[BACK]Return to kern_exec.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/kern_exec.c, Revision 1.306

1.306   ! pooka       1: /*     $NetBSD: kern_exec.c,v 1.305 2011/01/18 08:21:03 matt Exp $     */
1.277     ad          2:
                      3: /*-
                      4:  * Copyright (c) 2008 The NetBSD Foundation, Inc.
                      5:  * All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
                     15:  *
                     16:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     17:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     18:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     19:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     20:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     21:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     22:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     23:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     24:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     25:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     26:  * POSSIBILITY OF SUCH DAMAGE.
                     27:  */
1.55      cgd        28:
                     29: /*-
1.77      cgd        30:  * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
1.55      cgd        31:  * Copyright (C) 1992 Wolfgang Solfrank.
                     32:  * Copyright (C) 1992 TooLs GmbH.
                     33:  * All rights reserved.
                     34:  *
                     35:  * Redistribution and use in source and binary forms, with or without
                     36:  * modification, are permitted provided that the following conditions
                     37:  * are met:
                     38:  * 1. Redistributions of source code must retain the above copyright
                     39:  *    notice, this list of conditions and the following disclaimer.
                     40:  * 2. Redistributions in binary form must reproduce the above copyright
                     41:  *    notice, this list of conditions and the following disclaimer in the
                     42:  *    documentation and/or other materials provided with the distribution.
                     43:  * 3. All advertising materials mentioning features or use of this software
                     44:  *    must display the following acknowledgement:
                     45:  *     This product includes software developed by TooLs GmbH.
                     46:  * 4. The name of TooLs GmbH may not be used to endorse or promote products
                     47:  *    derived from this software without specific prior written permission.
                     48:  *
                     49:  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
                     50:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
                     51:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     52:  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
                     53:  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
                     54:  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
                     55:  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
                     56:  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
                     57:  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
                     58:  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     59:  */
1.146     lukem      60:
                     61: #include <sys/cdefs.h>
1.306   ! pooka      62: __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.305 2011/01/18 08:21:03 matt Exp $");
1.89      mrg        63:
1.92      thorpej    64: #include "opt_ktrace.h"
1.285     apb        65: #include "opt_modular.h"
1.124     jdolecek   66: #include "opt_syscall_debug.h"
1.226     dogcow     67: #include "veriexec.h"
1.232     elad       68: #include "opt_pax.h"
1.279     wrstuden   69: #include "opt_sa.h"
1.55      cgd        70:
                     71: #include <sys/param.h>
                     72: #include <sys/systm.h>
                     73: #include <sys/filedesc.h>
                     74: #include <sys/kernel.h>
                     75: #include <sys/proc.h>
                     76: #include <sys/mount.h>
                     77: #include <sys/malloc.h>
1.265     yamt       78: #include <sys/kmem.h>
1.55      cgd        79: #include <sys/namei.h>
                     80: #include <sys/vnode.h>
                     81: #include <sys/file.h>
                     82: #include <sys/acct.h>
                     83: #include <sys/exec.h>
                     84: #include <sys/ktrace.h>
1.278     pooka      85: #include <sys/uidinfo.h>
1.55      cgd        86: #include <sys/wait.h>
                     87: #include <sys/mman.h>
1.155     gmcgarry   88: #include <sys/ras.h>
1.55      cgd        89: #include <sys/signalvar.h>
                     90: #include <sys/stat.h>
1.124     jdolecek   91: #include <sys/syscall.h>
1.218     elad       92: #include <sys/kauth.h>
1.253     ad         93: #include <sys/lwpctl.h>
1.260     christos   94: #include <sys/pax.h>
1.263     ad         95: #include <sys/cpu.h>
1.282     ad         96: #include <sys/module.h>
1.279     wrstuden   97: #include <sys/sa.h>
                     98: #include <sys/savar.h>
1.289     pooka      99: #include <sys/syscallvar.h>
1.56      cgd       100: #include <sys/syscallargs.h>
1.222     elad      101: #if NVERIEXEC > 0
1.197     blymn     102: #include <sys/verified_exec.h>
1.222     elad      103: #endif /* NVERIEXEC > 0 */
1.294     darran    104: #include <sys/sdt.h>
1.55      cgd       105:
1.88      mrg       106: #include <uvm/uvm_extern.h>
                    107:
1.55      cgd       108: #include <machine/reg.h>
                    109:
1.244     dsl       110: #include <compat/common/compat_util.h>
                    111:
1.171     chs       112: static int exec_sigcode_map(struct proc *, const struct emul *);
                    113:
1.143     christos  114: #ifdef DEBUG_EXEC
1.305     matt      115: #define DPRINTF(a) printf a
1.143     christos  116: #else
                    117: #define DPRINTF(a)
                    118: #endif /* DEBUG_EXEC */
1.165     thorpej   119:
1.130     jdolecek  120: /*
1.294     darran    121:  * DTrace SDT provider definitions
                    122:  */
                    123: SDT_PROBE_DEFINE(proc,,,exec,
                    124:            "char *", NULL,
                    125:            NULL, NULL, NULL, NULL,
                    126:            NULL, NULL, NULL, NULL);
                    127: SDT_PROBE_DEFINE(proc,,,exec_success,
                    128:            "char *", NULL,
                    129:            NULL, NULL, NULL, NULL,
                    130:            NULL, NULL, NULL, NULL);
                    131: SDT_PROBE_DEFINE(proc,,,exec_failure,
                    132:            "int", NULL,
                    133:            NULL, NULL, NULL, NULL,
                    134:            NULL, NULL, NULL, NULL);
                    135:
                    136: /*
1.130     jdolecek  137:  * Exec function switch:
                    138:  *
                    139:  * Note that each makecmds function is responsible for loading the
                    140:  * exec package with the necessary functions for any exec-type-specific
                    141:  * handling.
                    142:  *
                    143:  * Functions for specific exec types should be defined in their own
                    144:  * header file.
                    145:  */
1.138     lukem     146: static const struct execsw     **execsw = NULL;
                    147: static int                     nexecs;
                    148:
1.282     ad        149: u_int  exec_maxhdrsz;   /* must not be static - used by netbsd32 */
1.130     jdolecek  150:
                    151: /* list of dynamically loaded execsw entries */
1.282     ad        152: static LIST_HEAD(execlist_head, exec_entry) ex_head =
                    153:     LIST_HEAD_INITIALIZER(ex_head);
1.130     jdolecek  154: struct exec_entry {
1.138     lukem     155:        LIST_ENTRY(exec_entry)  ex_list;
1.282     ad        156:        SLIST_ENTRY(exec_entry) ex_slist;
                    157:        const struct execsw     *ex_sw;
1.130     jdolecek  158: };
                    159:
1.203     christos  160: #ifndef __HAVE_SYSCALL_INTERN
                    161: void   syscall(void);
                    162: #endif
                    163:
1.280     matt      164: #ifdef KERN_SA
1.282     ad        165: static struct sa_emul saemul_netbsd = {
1.279     wrstuden  166:        sizeof(ucontext_t),
                    167:        sizeof(struct sa_t),
                    168:        sizeof(struct sa_t *),
                    169:        NULL,
                    170:        NULL,
                    171:        cpu_upcall,
                    172:        (void (*)(struct lwp *, void *))getucontext_sa,
                    173:        sa_ucsp
                    174: };
1.280     matt      175: #endif /* KERN_SA */
1.279     wrstuden  176:
1.173     christos  177: /* NetBSD emul struct */
1.282     ad        178: struct emul emul_netbsd = {
1.291     rmind     179:        .e_name =               "netbsd",
                    180:        .e_path =               NULL,
1.133     mycroft   181: #ifndef __HAVE_MINIMAL_EMUL
1.291     rmind     182:        .e_flags =              EMUL_HAS_SYS___syscall,
                    183:        .e_errno =              NULL,
                    184:        .e_nosys =              SYS_syscall,
                    185:        .e_nsysent =            SYS_NSYSENT,
1.133     mycroft   186: #endif
1.291     rmind     187:        .e_sysent =             sysent,
1.124     jdolecek  188: #ifdef SYSCALL_DEBUG
1.291     rmind     189:        .e_syscallnames =       syscallnames,
1.124     jdolecek  190: #else
1.291     rmind     191:        .e_syscallnames =       NULL,
1.124     jdolecek  192: #endif
1.291     rmind     193:        .e_sendsig =            sendsig,
                    194:        .e_trapsignal =         trapsignal,
                    195:        .e_tracesig =           NULL,
                    196:        .e_sigcode =            NULL,
                    197:        .e_esigcode =           NULL,
                    198:        .e_sigobject =          NULL,
                    199:        .e_setregs =            setregs,
                    200:        .e_proc_exec =          NULL,
                    201:        .e_proc_fork =          NULL,
                    202:        .e_proc_exit =          NULL,
                    203:        .e_lwp_fork =           NULL,
                    204:        .e_lwp_exit =           NULL,
1.133     mycroft   205: #ifdef __HAVE_SYSCALL_INTERN
1.291     rmind     206:        .e_syscall_intern =     syscall_intern,
1.133     mycroft   207: #else
1.291     rmind     208:        .e_syscall =            syscall,
1.133     mycroft   209: #endif
1.291     rmind     210:        .e_sysctlovly =         NULL,
                    211:        .e_fault =              NULL,
                    212:        .e_vm_default_addr =    uvm_default_mapaddr,
                    213:        .e_usertrap =           NULL,
1.280     matt      214: #ifdef KERN_SA
1.291     rmind     215:        .e_sa =                 &saemul_netbsd,
1.280     matt      216: #else
1.291     rmind     217:        .e_sa =                 NULL,
1.280     matt      218: #endif
1.291     rmind     219:        .e_ucsize =             sizeof(ucontext_t),
                    220:        .e_startlwp =           startlwp
1.124     jdolecek  221: };
                    222:
1.55      cgd       223: /*
1.130     jdolecek  224:  * Exec lock. Used to control access to execsw[] structures.
                    225:  * This must not be static so that netbsd32 can access it, too.
                    226:  */
1.237     ad        227: krwlock_t exec_lock;
1.183     junyoung  228:
1.259     ad        229: static kmutex_t sigobject_lock;
                    230:
1.277     ad        231: static void *
                    232: exec_pool_alloc(struct pool *pp, int flags)
                    233: {
                    234:
                    235:        return (void *)uvm_km_alloc(kernel_map, NCARGS, 0,
                    236:            UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
                    237: }
                    238:
                    239: static void
                    240: exec_pool_free(struct pool *pp, void *addr)
                    241: {
                    242:
                    243:        uvm_km_free(kernel_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE);
                    244: }
                    245:
                    246: static struct pool exec_pool;
                    247:
                    248: static struct pool_allocator exec_palloc = {
                    249:        .pa_alloc = exec_pool_alloc,
                    250:        .pa_free = exec_pool_free,
                    251:        .pa_pagesz = NCARGS
                    252: };
                    253:
1.130     jdolecek  254: /*
1.55      cgd       255:  * check exec:
                    256:  * given an "executable" described in the exec package's namei info,
                    257:  * see what we can do with it.
                    258:  *
                    259:  * ON ENTRY:
                    260:  *     exec package with appropriate namei info
1.212     christos  261:  *     lwp pointer of exec'ing lwp
1.55      cgd       262:  *     NO SELF-LOCKED VNODES
                    263:  *
                    264:  * ON EXIT:
                    265:  *     error:  nothing held, etc.  exec header still allocated.
1.77      cgd       266:  *     ok:     filled exec package, executable's vnode (unlocked).
1.55      cgd       267:  *
                    268:  * EXEC SWITCH ENTRY:
                    269:  *     Locked vnode to check, exec package, proc.
                    270:  *
                    271:  * EXEC SWITCH EXIT:
1.77      cgd       272:  *     ok:     return 0, filled exec package, executable's vnode (unlocked).
1.55      cgd       273:  *     error:  destructive:
                    274:  *                     everything deallocated execept exec header.
1.76      cgd       275:  *             non-destructive:
1.77      cgd       276:  *                     error code, executable's vnode (unlocked),
1.76      cgd       277:  *                     exec header unmodified.
1.55      cgd       278:  */
                    279: int
1.205     christos  280: /*ARGSUSED*/
1.301     dholland  281: check_exec(struct lwp *l, struct exec_package *epp, struct pathbuf *pb)
1.55      cgd       282: {
1.138     lukem     283:        int             error, i;
                    284:        struct vnode    *vp;
1.295     dholland  285:        struct nameidata nd;
1.138     lukem     286:        size_t          resid;
1.55      cgd       287:
1.303     dholland  288:        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1.295     dholland  289:
1.55      cgd       290:        /* first get the vnode */
1.295     dholland  291:        if ((error = namei(&nd)) != 0)
1.55      cgd       292:                return error;
1.295     dholland  293:        epp->ep_vp = vp = nd.ni_vp;
                    294:        /* this cannot overflow as both are size PATH_MAX */
1.302     dholland  295:        strcpy(epp->ep_resolvedname, nd.ni_pnbuf);
1.295     dholland  296:
1.296     dholland  297: #ifdef DIAGNOSTIC
                    298:        /* paranoia (take this out once namei stuff stabilizes) */
1.302     dholland  299:        memset(nd.ni_pnbuf, '~', PATH_MAX);
1.295     dholland  300: #endif
1.55      cgd       301:
1.84      mycroft   302:        /* check access and type */
1.55      cgd       303:        if (vp->v_type != VREG) {
1.81      kleink    304:                error = EACCES;
1.55      cgd       305:                goto bad1;
                    306:        }
1.254     pooka     307:        if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
1.84      mycroft   308:                goto bad1;
1.55      cgd       309:
                    310:        /* get attributes */
1.254     pooka     311:        if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
1.55      cgd       312:                goto bad1;
                    313:
                    314:        /* Check mount point */
                    315:        if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
                    316:                error = EACCES;
                    317:                goto bad1;
                    318:        }
1.141     thorpej   319:        if (vp->v_mount->mnt_flag & MNT_NOSUID)
1.83      mycroft   320:                epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
1.55      cgd       321:
                    322:        /* try to open it */
1.254     pooka     323:        if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
1.55      cgd       324:                goto bad1;
                    325:
1.99      wrstuden  326:        /* unlock vp, since we need it unlocked from here on out. */
1.298     hannken   327:        VOP_UNLOCK(vp);
1.77      cgd       328:
1.222     elad      329: #if NVERIEXEC > 0
1.295     dholland  330:        error = veriexec_verify(l, vp, epp->ep_resolvedname,
1.233     elad      331:            epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
1.236     elad      332:            NULL);
                    333:        if (error)
1.234     elad      334:                goto bad2;
1.222     elad      335: #endif /* NVERIEXEC > 0 */
1.160     blymn     336:
1.232     elad      337: #ifdef PAX_SEGVGUARD
1.295     dholland  338:        error = pax_segvguard(l, vp, epp->ep_resolvedname, false);
1.234     elad      339:        if (error)
                    340:                goto bad2;
1.232     elad      341: #endif /* PAX_SEGVGUARD */
                    342:
1.55      cgd       343:        /* now we have the file, get the exec header */
1.74      christos  344:        error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
1.223     ad        345:                        UIO_SYSSPACE, 0, l->l_cred, &resid, NULL);
1.74      christos  346:        if (error)
1.55      cgd       347:                goto bad2;
                    348:        epp->ep_hdrvalid = epp->ep_hdrlen - resid;
                    349:
                    350:        /*
1.136     eeh       351:         * Set up default address space limits.  Can be overridden
                    352:         * by individual exec packages.
1.183     junyoung  353:         *
1.235     rillig    354:         * XXX probably should be all done in the exec packages.
1.136     eeh       355:         */
                    356:        epp->ep_vm_minaddr = VM_MIN_ADDRESS;
                    357:        epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
                    358:        /*
1.55      cgd       359:         * set up the vmcmds for creation of the process
                    360:         * address space
                    361:         */
                    362:        error = ENOEXEC;
1.244     dsl       363:        for (i = 0; i < nexecs; i++) {
1.68      cgd       364:                int newerror;
                    365:
1.130     jdolecek  366:                epp->ep_esch = execsw[i];
1.212     christos  367:                newerror = (*execsw[i]->es_makecmds)(l, epp);
1.244     dsl       368:
                    369:                if (!newerror) {
                    370:                        /* Seems ok: check that entry point is sane */
                    371:                        if (epp->ep_entry > VM_MAXUSER_ADDRESS) {
                    372:                                error = ENOEXEC;
                    373:                                break;
                    374:                        }
                    375:
                    376:                        /* check limits */
                    377:                        if ((epp->ep_tsize > MAXTSIZ) ||
                    378:                            (epp->ep_dsize > (u_quad_t)l->l_proc->p_rlimit
                    379:                                                    [RLIMIT_DATA].rlim_cur)) {
                    380:                                error = ENOMEM;
                    381:                                break;
                    382:                        }
                    383:                        return 0;
                    384:                }
                    385:
                    386:                if (epp->ep_emul_root != NULL) {
                    387:                        vrele(epp->ep_emul_root);
                    388:                        epp->ep_emul_root = NULL;
                    389:                }
                    390:                if (epp->ep_interp != NULL) {
                    391:                        vrele(epp->ep_interp);
                    392:                        epp->ep_interp = NULL;
                    393:                }
                    394:
1.68      cgd       395:                /* make sure the first "interesting" error code is saved. */
1.244     dsl       396:                if (error == ENOEXEC)
1.68      cgd       397:                        error = newerror;
1.124     jdolecek  398:
1.244     dsl       399:                if (epp->ep_flags & EXEC_DESTR)
                    400:                        /* Error from "#!" code, tidied up by recursive call */
1.55      cgd       401:                        return error;
                    402:        }
                    403:
1.249     pooka     404:        /* not found, error */
                    405:
1.55      cgd       406:        /*
                    407:         * free any vmspace-creation commands,
                    408:         * and release their references
                    409:         */
                    410:        kill_vmcmds(&epp->ep_vmcmds);
                    411:
                    412: bad2:
                    413:        /*
1.99      wrstuden  414:         * close and release the vnode, restore the old one, free the
1.55      cgd       415:         * pathname buf, and punt.
                    416:         */
1.99      wrstuden  417:        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.254     pooka     418:        VOP_CLOSE(vp, FREAD, l->l_cred);
1.99      wrstuden  419:        vput(vp);
1.55      cgd       420:        return error;
                    421:
                    422: bad1:
                    423:        /*
                    424:         * free the namei pathname buffer, and put the vnode
                    425:         * (which we don't yet have open).
                    426:         */
1.77      cgd       427:        vput(vp);                               /* was still locked */
1.55      cgd       428:        return error;
                    429: }
                    430:
1.188     chs       431: #ifdef __MACHINE_STACK_GROWS_UP
                    432: #define STACK_PTHREADSPACE NBPG
                    433: #else
                    434: #define STACK_PTHREADSPACE 0
                    435: #endif
                    436:
1.204     cube      437: static int
                    438: execve_fetch_element(char * const *array, size_t index, char **value)
                    439: {
                    440:        return copyin(array + index, value, sizeof(*value));
                    441: }
                    442:
1.55      cgd       443: /*
                    444:  * exec system call
                    445:  */
                    446: /* ARGSUSED */
1.75      christos  447: int
1.258     dsl       448: sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval)
1.71      thorpej   449: {
1.258     dsl       450:        /* {
1.138     lukem     451:                syscallarg(const char *)        path;
                    452:                syscallarg(char * const *)      argp;
                    453:                syscallarg(char * const *)      envp;
1.258     dsl       454:        } */
1.204     cube      455:
                    456:        return execve1(l, SCARG(uap, path), SCARG(uap, argp),
                    457:            SCARG(uap, envp), execve_fetch_element);
                    458: }
                    459:
1.282     ad        460: /*
                    461:  * Load modules to try and execute an image that we do not understand.
                    462:  * If no execsw entries are present, we load those likely to be needed
                    463:  * in order to run native images only.  Otherwise, we autoload all
                    464:  * possible modules that could let us run the binary.  XXX lame
                    465:  */
                    466: static void
                    467: exec_autoload(void)
                    468: {
                    469: #ifdef MODULAR
                    470:        static const char * const native[] = {
                    471:                "exec_elf32",
                    472:                "exec_elf64",
                    473:                "exec_script",
                    474:                NULL
                    475:        };
                    476:        static const char * const compat[] = {
                    477:                "exec_elf32",
                    478:                "exec_elf64",
                    479:                "exec_script",
                    480:                "exec_aout",
                    481:                "exec_coff",
                    482:                "exec_ecoff",
                    483:                "compat_aoutm68k",
                    484:                "compat_freebsd",
                    485:                "compat_ibcs2",
                    486:                "compat_irix",
                    487:                "compat_linux",
                    488:                "compat_linux32",
                    489:                "compat_netbsd32",
                    490:                "compat_sunos",
                    491:                "compat_sunos32",
                    492:                "compat_svr4",
                    493:                "compat_svr4_32",
                    494:                "compat_ultrix",
                    495:                NULL
                    496:        };
                    497:        char const * const *list;
                    498:        int i;
                    499:
                    500:        list = (nexecs == 0 ? native : compat);
                    501:        for (i = 0; list[i] != NULL; i++) {
                    502:                if (module_autoload(list[i], MODULE_CLASS_MISC) != 0) {
                    503:                        continue;
                    504:                }
                    505:                yield();
                    506:        }
                    507: #endif
                    508: }
                    509:
1.204     cube      510: int
                    511: execve1(struct lwp *l, const char *path, char * const *args,
                    512:     char * const *envs, execve_fetch_element_t fetch_element)
                    513: {
1.153     thorpej   514:        int                     error;
1.138     lukem     515:        struct exec_package     pack;
1.301     dholland  516:        struct pathbuf          *pb;
1.138     lukem     517:        struct vattr            attr;
1.164     thorpej   518:        struct proc             *p;
1.138     lukem     519:        char                    *argp;
                    520:        char                    *dp, *sp;
                    521:        long                    argc, envc;
1.248     christos  522:        size_t                  i, len;
1.138     lukem     523:        char                    *stack;
                    524:        struct ps_strings       arginfo;
1.213     manu      525:        struct ps_strings       *aip = &arginfo;
1.138     lukem     526:        struct vmspace          *vm;
1.265     yamt      527:        struct exec_fakearg     *tmpfap;
1.138     lukem     528:        int                     szsigcode;
                    529:        struct exec_vmcmd       *base_vcp;
1.279     wrstuden  530:        int                     oldlwpflags;
1.237     ad        531:        ksiginfo_t              ksi;
                    532:        ksiginfoq_t             kq;
1.301     dholland  533:        const char              *pathstring;
1.295     dholland  534:        char                    *resolvedpathbuf;
                    535:        const char              *commandname;
1.282     ad        536:        u_int                   modgen;
1.55      cgd       537:
1.237     ad        538:        p = l->l_proc;
1.282     ad        539:        modgen = 0;
1.164     thorpej   540:
1.294     darran    541:        SDT_PROBE(proc,,,exec, path, 0, 0, 0, 0);
                    542:
1.149     christos  543:        /*
1.269     christos  544:         * Check if we have exceeded our number of processes limit.
                    545:         * This is so that we handle the case where a root daemon
                    546:         * forked, ran setuid to become the desired user and is trying
                    547:         * to exec. The obvious place to do the reference counting check
                    548:         * is setuid(), but we don't do the reference counting check there
                    549:         * like other OS's do because then all the programs that use setuid()
                    550:         * must be modified to check the return code of setuid() and exit().
                    551:         * It is dangerous to make setuid() fail, because it fails open and
                    552:         * the program will continue to run as root. If we make it succeed
                    553:         * and return an error code, again we are not enforcing the limit.
                    554:         * The best place to enforce the limit is here, when the process tries
                    555:         * to execute a new image, because eventually the process will need
                    556:         * to call exec in order to do something useful.
                    557:         */
1.282     ad        558:  retry:
1.287     christos  559:        if ((p->p_flag & PK_SUGID) && kauth_authorize_generic(l->l_cred,
                    560:            KAUTH_GENERIC_ISSUSER, NULL) != 0 && chgproccnt(kauth_cred_getuid(
                    561:            l->l_cred), 0) > p->p_rlimit[RLIMIT_NPROC].rlim_cur)
1.269     christos  562:                return EAGAIN;
                    563:
1.279     wrstuden  564:        oldlwpflags = l->l_flag & (LW_SA | LW_SA_UPCALL);
                    565:        if (l->l_flag & LW_SA) {
                    566:                lwp_lock(l);
                    567:                l->l_flag &= ~(LW_SA | LW_SA_UPCALL);
                    568:                lwp_unlock(l);
                    569:        }
                    570:
1.269     christos  571:        /*
1.237     ad        572:         * Drain existing references and forbid new ones.  The process
                    573:         * should be left alone until we're done here.  This is necessary
                    574:         * to avoid race conditions - e.g. in ptrace() - that might allow
                    575:         * a local user to illicitly obtain elevated privileges.
                    576:         */
1.252     ad        577:        rw_enter(&p->p_reflock, RW_WRITER);
1.149     christos  578:
1.138     lukem     579:        base_vcp = NULL;
1.55      cgd       580:        /*
1.129     jdolecek  581:         * Init the namei data to point the file user's program name.
                    582:         * This is done here rather than in check_exec(), so that it's
                    583:         * possible to override this settings if any of makecmd/probe
                    584:         * functions call check_exec() recursively - for example,
                    585:         * see exec_script_makecmds().
                    586:         */
1.301     dholland  587:        error = pathbuf_copyin(path, &pb);
1.248     christos  588:        if (error) {
1.305     matt      589:                DPRINTF(("execve: pathbuf_copyin path @%p %d\n", path, error));
1.200     elad      590:                goto clrflg;
1.248     christos  591:        }
1.301     dholland  592:        pathstring = pathbuf_stringcopy_get(pb);
1.295     dholland  593:        resolvedpathbuf = PNBUF_GET();
                    594: #ifdef DIAGNOSTIC
                    595:        strcpy(resolvedpathbuf, "/wrong");
                    596: #endif
1.55      cgd       597:
                    598:        /*
                    599:         * initialize the fields of the exec package.
                    600:         */
1.204     cube      601:        pack.ep_name = path;
1.301     dholland  602:        pack.ep_kname = pathstring;
1.295     dholland  603:        pack.ep_resolvedname = resolvedpathbuf;
1.265     yamt      604:        pack.ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP);
1.55      cgd       605:        pack.ep_hdrlen = exec_maxhdrsz;
                    606:        pack.ep_hdrvalid = 0;
1.67      christos  607:        pack.ep_emul_arg = NULL;
1.55      cgd       608:        pack.ep_vmcmds.evs_cnt = 0;
                    609:        pack.ep_vmcmds.evs_used = 0;
                    610:        pack.ep_vap = &attr;
                    611:        pack.ep_flags = 0;
1.244     dsl       612:        pack.ep_emul_root = NULL;
                    613:        pack.ep_interp = NULL;
                    614:        pack.ep_esch = NULL;
1.273     ad        615:        pack.ep_pax_flags = 0;
1.55      cgd       616:
1.237     ad        617:        rw_enter(&exec_lock, RW_READER);
1.130     jdolecek  618:
1.55      cgd       619:        /* see if we can run it. */
1.301     dholland  620:        if ((error = check_exec(l, &pack, pb)) != 0) {
1.261     xtraeme   621:                if (error != ENOENT) {
1.260     christos  622:                        DPRINTF(("execve: check exec failed %d\n", error));
1.261     xtraeme   623:                }
1.55      cgd       624:                goto freehdr;
1.248     christos  625:        }
1.55      cgd       626:
                    627:        /* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */
                    628:
                    629:        /* allocate an argument buffer */
1.277     ad        630:        argp = pool_get(&exec_pool, PR_WAITOK);
                    631:        KASSERT(argp != NULL);
1.55      cgd       632:        dp = argp;
                    633:        argc = 0;
                    634:
                    635:        /* copy the fake args list, if there's one, freeing it as we go */
                    636:        if (pack.ep_flags & EXEC_HASARGL) {
                    637:                tmpfap = pack.ep_fa;
1.265     yamt      638:                while (tmpfap->fa_arg != NULL) {
                    639:                        const char *cp;
1.55      cgd       640:
1.265     yamt      641:                        cp = tmpfap->fa_arg;
1.55      cgd       642:                        while (*cp)
                    643:                                *dp++ = *cp++;
1.276     ad        644:                        *dp++ = '\0';
1.290     dsl       645:                        ktrexecarg(tmpfap->fa_arg, cp - tmpfap->fa_arg);
1.55      cgd       646:
1.265     yamt      647:                        kmem_free(tmpfap->fa_arg, tmpfap->fa_len);
1.55      cgd       648:                        tmpfap++; argc++;
                    649:                }
1.265     yamt      650:                kmem_free(pack.ep_fa, pack.ep_fa_len);
1.55      cgd       651:                pack.ep_flags &= ~EXEC_HASARGL;
                    652:        }
                    653:
                    654:        /* Now get argv & environment */
1.204     cube      655:        if (args == NULL) {
1.248     christos  656:                DPRINTF(("execve: null args\n"));
1.55      cgd       657:                error = EINVAL;
                    658:                goto bad;
                    659:        }
1.204     cube      660:        /* 'i' will index the argp/envp element to be retrieved */
                    661:        i = 0;
1.55      cgd       662:        if (pack.ep_flags & EXEC_SKIPARG)
1.204     cube      663:                i++;
1.55      cgd       664:
                    665:        while (1) {
                    666:                len = argp + ARG_MAX - dp;
1.248     christos  667:                if ((error = (*fetch_element)(args, i, &sp)) != 0) {
                    668:                        DPRINTF(("execve: fetch_element args %d\n", error));
1.55      cgd       669:                        goto bad;
1.248     christos  670:                }
1.55      cgd       671:                if (!sp)
                    672:                        break;
1.74      christos  673:                if ((error = copyinstr(sp, dp, len, &len)) != 0) {
1.248     christos  674:                        DPRINTF(("execve: copyinstr args %d\n", error));
1.55      cgd       675:                        if (error == ENAMETOOLONG)
                    676:                                error = E2BIG;
                    677:                        goto bad;
                    678:                }
1.247     ad        679:                ktrexecarg(dp, len - 1);
1.55      cgd       680:                dp += len;
1.204     cube      681:                i++;
1.55      cgd       682:                argc++;
                    683:        }
                    684:
                    685:        envc = 0;
1.74      christos  686:        /* environment need not be there */
1.204     cube      687:        if (envs != NULL) {
                    688:                i = 0;
1.55      cgd       689:                while (1) {
                    690:                        len = argp + ARG_MAX - dp;
1.248     christos  691:                        if ((error = (*fetch_element)(envs, i, &sp)) != 0) {
                    692:                                DPRINTF(("execve: fetch_element env %d\n", error));
1.55      cgd       693:                                goto bad;
1.248     christos  694:                        }
1.55      cgd       695:                        if (!sp)
                    696:                                break;
1.74      christos  697:                        if ((error = copyinstr(sp, dp, len, &len)) != 0) {
1.248     christos  698:                                DPRINTF(("execve: copyinstr env %d\n", error));
1.55      cgd       699:                                if (error == ENAMETOOLONG)
                    700:                                        error = E2BIG;
                    701:                                goto bad;
                    702:                        }
1.247     ad        703:                        ktrexecenv(dp, len - 1);
1.55      cgd       704:                        dp += len;
1.204     cube      705:                        i++;
1.55      cgd       706:                        envc++;
                    707:                }
                    708:        }
1.61      mycroft   709:
                    710:        dp = (char *) ALIGN(dp);
1.55      cgd       711:
1.244     dsl       712:        szsigcode = pack.ep_esch->es_emul->e_esigcode -
                    713:            pack.ep_esch->es_emul->e_sigcode;
1.65      fvdl      714:
1.267     dsl       715: #ifdef __MACHINE_STACK_GROWS_UP
                    716: /* See big comment lower down */
                    717: #define        RTLD_GAP        32
                    718: #else
                    719: #define        RTLD_GAP        0
                    720: #endif
                    721:
1.55      cgd       722:        /* Now check if args & environ fit into new stack */
1.105     eeh       723:        if (pack.ep_flags & EXEC_32)
1.244     dsl       724:                len = ((argc + envc + 2 + pack.ep_esch->es_arglen) *
1.267     dsl       725:                    sizeof(int) + sizeof(int) + dp + RTLD_GAP +
1.188     chs       726:                    szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
                    727:                    - argp;
1.105     eeh       728:        else
1.244     dsl       729:                len = ((argc + envc + 2 + pack.ep_esch->es_arglen) *
1.267     dsl       730:                    sizeof(char *) + sizeof(int) + dp + RTLD_GAP +
1.188     chs       731:                    szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
                    732:                    - argp;
1.67      christos  733:
1.262     elad      734: #ifdef PAX_ASLR
                    735:        if (pax_aslr_active(l))
                    736:                len += (arc4random() % PAGE_SIZE);
                    737: #endif /* PAX_ASLR */
                    738:
1.243     matt      739: #ifdef STACKLALIGN     /* arm, etc. */
                    740:        len = STACKALIGN(len);  /* make the stack "safely" aligned */
                    741: #else
1.55      cgd       742:        len = ALIGN(len);       /* make the stack "safely" aligned */
1.243     matt      743: #endif
1.55      cgd       744:
                    745:        if (len > pack.ep_ssize) { /* in effect, compare to initial limit */
1.248     christos  746:                DPRINTF(("execve: stack limit exceeded %zu\n", len));
1.55      cgd       747:                error = ENOMEM;
                    748:                goto bad;
                    749:        }
                    750:
1.237     ad        751:        /* Get rid of other LWPs. */
1.279     wrstuden  752:        if (p->p_sa || p->p_nlwps > 1) {
1.272     ad        753:                mutex_enter(p->p_lock);
1.237     ad        754:                exit_lwps(l);
1.272     ad        755:                mutex_exit(p->p_lock);
1.237     ad        756:        }
1.164     thorpej   757:        KDASSERT(p->p_nlwps == 1);
                    758:
1.253     ad        759:        /* Destroy any lwpctl info. */
                    760:        if (p->p_lwpctl != NULL)
                    761:                lwp_ctl_exit();
                    762:
1.279     wrstuden  763: #ifdef KERN_SA
                    764:        /* Release any SA state. */
                    765:        if (p->p_sa)
                    766:                sa_release(p);
                    767: #endif /* KERN_SA */
                    768:
1.164     thorpej   769:        /* Remove POSIX timers */
                    770:        timers_free(p, TIMERS_POSIX);
                    771:
1.55      cgd       772:        /* adjust "active stack depth" for process VSZ */
                    773:        pack.ep_ssize = len;    /* maybe should go elsewhere, but... */
                    774:
1.86      thorpej   775:        /*
                    776:         * Do whatever is necessary to prepare the address space
                    777:         * for remapping.  Note that this might replace the current
                    778:         * vmspace with another!
                    779:         */
1.164     thorpej   780:        uvmspace_exec(l, pack.ep_vm_minaddr, pack.ep_vm_maxaddr);
1.55      cgd       781:
1.186     chs       782:        /* record proc's vnode, for use by procfs and others */
                    783:         if (p->p_textvp)
                    784:                 vrele(p->p_textvp);
1.293     pooka     785:        vref(pack.ep_vp);
1.186     chs       786:        p->p_textvp = pack.ep_vp;
                    787:
1.55      cgd       788:        /* Now map address space */
1.86      thorpej   789:        vm = p->p_vmspace;
1.241     dogcow    790:        vm->vm_taddr = (void *)pack.ep_taddr;
1.55      cgd       791:        vm->vm_tsize = btoc(pack.ep_tsize);
1.241     dogcow    792:        vm->vm_daddr = (void*)pack.ep_daddr;
1.55      cgd       793:        vm->vm_dsize = btoc(pack.ep_dsize);
                    794:        vm->vm_ssize = btoc(pack.ep_ssize);
1.288     mrg       795:        vm->vm_issize = 0;
1.241     dogcow    796:        vm->vm_maxsaddr = (void *)pack.ep_maxsaddr;
                    797:        vm->vm_minsaddr = (void *)pack.ep_minsaddr;
1.55      cgd       798:
1.260     christos  799: #ifdef PAX_ASLR
                    800:        pax_aslr_init(l, vm);
                    801: #endif /* PAX_ASLR */
                    802:
1.55      cgd       803:        /* create the new process's VM space by running the vmcmds */
                    804: #ifdef DIAGNOSTIC
                    805:        if (pack.ep_vmcmds.evs_used == 0)
                    806:                panic("execve: no vmcmds");
                    807: #endif
                    808:        for (i = 0; i < pack.ep_vmcmds.evs_used && !error; i++) {
                    809:                struct exec_vmcmd *vcp;
                    810:
                    811:                vcp = &pack.ep_vmcmds.evs_cmds[i];
1.114     matt      812:                if (vcp->ev_flags & VMCMD_RELATIVE) {
                    813: #ifdef DIAGNOSTIC
                    814:                        if (base_vcp == NULL)
                    815:                                panic("execve: relative vmcmd with no base");
                    816:                        if (vcp->ev_flags & VMCMD_BASE)
                    817:                                panic("execve: illegal base & relative vmcmd");
                    818: #endif
                    819:                        vcp->ev_addr += base_vcp->ev_addr;
                    820:                }
1.212     christos  821:                error = (*vcp->ev_proc)(l, vcp);
1.143     christos  822: #ifdef DEBUG_EXEC
1.111     matt      823:                if (error) {
1.248     christos  824:                        size_t j;
1.143     christos  825:                        struct exec_vmcmd *vp = &pack.ep_vmcmds.evs_cmds[0];
                    826:                        for (j = 0; j <= i; j++)
                    827:                                uprintf(
1.305     matt      828:                        "vmcmd[%zu] = %#"PRIxVADDR"/%#"PRIxVSIZE" fd@%#"PRIxVSIZE" prot=0%o flags=%d\n",
1.143     christos  829:                                    j, vp[j].ev_addr, vp[j].ev_len,
                    830:                                    vp[j].ev_offset, vp[j].ev_prot,
                    831:                                    vp[j].ev_flags);
1.111     matt      832:                }
1.143     christos  833: #endif /* DEBUG_EXEC */
1.114     matt      834:                if (vcp->ev_flags & VMCMD_BASE)
                    835:                        base_vcp = vcp;
1.55      cgd       836:        }
                    837:
                    838:        /* free the vmspace-creation commands, and release their references */
                    839:        kill_vmcmds(&pack.ep_vmcmds);
                    840:
1.186     chs       841:        vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
1.254     pooka     842:        VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred);
1.186     chs       843:        vput(pack.ep_vp);
                    844:
1.55      cgd       845:        /* if an error happened, deallocate and punt */
1.111     matt      846:        if (error) {
1.248     christos  847:                DPRINTF(("execve: vmcmd %zu failed: %d\n", i - 1, error));
1.55      cgd       848:                goto exec_abort;
1.111     matt      849:        }
1.55      cgd       850:
                    851:        /* remember information about the process */
                    852:        arginfo.ps_nargvstr = argc;
                    853:        arginfo.ps_nenvstr = envc;
                    854:
1.255     christos  855:        /* set command name & other accounting info */
1.295     dholland  856:        commandname = strrchr(pack.ep_resolvedname, '/');
                    857:        if (commandname != NULL) {
                    858:                commandname++;
                    859:        } else {
                    860:                commandname = pack.ep_resolvedname;
                    861:        }
                    862:        i = min(strlen(commandname), MAXCOMLEN);
                    863:        (void)memcpy(p->p_comm, commandname, i);
1.255     christos  864:        p->p_comm[i] = '\0';
                    865:
                    866:        dp = PNBUF_GET();
                    867:        /*
                    868:         * If the path starts with /, we don't need to do any work.
                    869:         * This handles the majority of the cases.
                    870:         * In the future perhaps we could canonicalize it?
                    871:         */
1.301     dholland  872:        if (pathstring[0] == '/')
                    873:                (void)strlcpy(pack.ep_path = dp, pathstring, MAXPATHLEN);
1.255     christos  874: #ifdef notyet
                    875:        /*
                    876:         * Although this works most of the time [since the entry was just
                    877:         * entered in the cache] we don't use it because it theoretically
                    878:         * can fail and it is not the cleanest interface, because there
                    879:         * could be races. When the namei cache is re-written, this can
                    880:         * be changed to use the appropriate function.
                    881:         */
                    882:        else if (!(error = vnode_to_path(dp, MAXPATHLEN, p->p_textvp, l, p)))
                    883:                pack.ep_path = dp;
                    884: #endif
                    885:        else {
1.256     christos  886: #ifdef notyet
1.255     christos  887:                printf("Cannot get path for pid %d [%s] (error %d)",
                    888:                    (int)p->p_pid, p->p_comm, error);
                    889: #endif
                    890:                pack.ep_path = NULL;
                    891:                PNBUF_PUT(dp);
                    892:        }
                    893:
1.163     chs       894:        stack = (char *)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
1.188     chs       895:                STACK_PTHREADSPACE + sizeof(struct ps_strings) + szsigcode),
1.163     chs       896:                len - (sizeof(struct ps_strings) + szsigcode));
1.267     dsl       897:
1.163     chs       898: #ifdef __MACHINE_STACK_GROWS_UP
                    899:        /*
                    900:         * The copyargs call always copies into lower addresses
                    901:         * first, moving towards higher addresses, starting with
1.183     junyoung  902:         * the stack pointer that we give.  When the stack grows
                    903:         * down, this puts argc/argv/envp very shallow on the
1.267     dsl       904:         * stack, right at the first user stack pointer.
                    905:         * When the stack grows up, the situation is reversed.
1.163     chs       906:         *
                    907:         * Normally, this is no big deal.  But the ld_elf.so _rtld()
1.183     junyoung  908:         * function expects to be called with a single pointer to
                    909:         * a region that has a few words it can stash values into,
1.163     chs       910:         * followed by argc/argv/envp.  When the stack grows down,
                    911:         * it's easy to decrement the stack pointer a little bit to
                    912:         * allocate the space for these few words and pass the new
                    913:         * stack pointer to _rtld.  When the stack grows up, however,
1.171     chs       914:         * a few words before argc is part of the signal trampoline, XXX
1.163     chs       915:         * so we have a problem.
                    916:         *
1.183     junyoung  917:         * Instead of changing how _rtld works, we take the easy way
1.267     dsl       918:         * out and steal 32 bytes before we call copyargs.
                    919:         * This extra space was allowed for when 'len' was calculated.
1.163     chs       920:         */
1.267     dsl       921:        stack += RTLD_GAP;
1.163     chs       922: #endif /* __MACHINE_STACK_GROWS_UP */
                    923:
1.55      cgd       924:        /* Now copy argc, args & environ to new stack */
1.244     dsl       925:        error = (*pack.ep_esch->es_copyargs)(l, &pack, &arginfo, &stack, argp);
1.255     christos  926:        if (pack.ep_path) {
                    927:                PNBUF_PUT(pack.ep_path);
                    928:                pack.ep_path = NULL;
                    929:        }
1.144     christos  930:        if (error) {
                    931:                DPRINTF(("execve: copyargs failed %d\n", error));
1.55      cgd       932:                goto exec_abort;
1.111     matt      933:        }
1.144     christos  934:        /* Move the stack back to original point */
1.163     chs       935:        stack = (char *)STACK_GROW(vm->vm_minsaddr, len);
1.55      cgd       936:
1.121     eeh       937:        /* fill process ps_strings info */
1.188     chs       938:        p->p_psstr = (struct ps_strings *)
                    939:            STACK_ALLOC(STACK_GROW(vm->vm_minsaddr, STACK_PTHREADSPACE),
1.163     chs       940:            sizeof(struct ps_strings));
1.121     eeh       941:        p->p_psargv = offsetof(struct ps_strings, ps_argvstr);
                    942:        p->p_psnargv = offsetof(struct ps_strings, ps_nargvstr);
                    943:        p->p_psenv = offsetof(struct ps_strings, ps_envstr);
                    944:        p->p_psnenv = offsetof(struct ps_strings, ps_nenvstr);
                    945:
1.55      cgd       946:        /* copy out the process's ps_strings structure */
1.213     manu      947:        if ((error = copyout(aip, (char *)p->p_psstr,
1.144     christos  948:            sizeof(arginfo))) != 0) {
1.143     christos  949:                DPRINTF(("execve: ps_strings copyout %p->%p size %ld failed\n",
1.213     manu      950:                       aip, (char *)p->p_psstr, (long)sizeof(arginfo)));
1.55      cgd       951:                goto exec_abort;
1.111     matt      952:        }
1.109     simonb    953:
1.306   ! pooka     954:        cwdexec();
1.270     ad        955:        fd_closeexec();         /* handle close on exec */
1.55      cgd       956:        execsigs(p);            /* reset catched signals */
1.183     junyoung  957:
1.164     thorpej   958:        l->l_ctxlink = NULL;    /* reset ucontext link */
1.55      cgd       959:
1.255     christos  960:
1.55      cgd       961:        p->p_acflag &= ~AFORK;
1.272     ad        962:        mutex_enter(p->p_lock);
1.238     pavel     963:        p->p_flag |= PK_EXEC;
1.272     ad        964:        mutex_exit(p->p_lock);
1.237     ad        965:
                    966:        /*
                    967:         * Stop profiling.
                    968:         */
                    969:        if ((p->p_stflag & PST_PROFIL) != 0) {
                    970:                mutex_spin_enter(&p->p_stmutex);
                    971:                stopprofclock(p);
                    972:                mutex_spin_exit(&p->p_stmutex);
                    973:        }
                    974:
                    975:        /*
1.275     ad        976:         * It's OK to test PL_PPWAIT unlocked here, as other LWPs have
1.237     ad        977:         * exited and exec()/exit() are the only places it will be cleared.
                    978:         */
1.275     ad        979:        if ((p->p_lflag & PL_PPWAIT) != 0) {
1.271     ad        980:                mutex_enter(proc_lock);
1.275     ad        981:                p->p_lflag &= ~PL_PPWAIT;
1.237     ad        982:                cv_broadcast(&p->p_pptr->p_waitcv);
1.271     ad        983:                mutex_exit(proc_lock);
1.55      cgd       984:        }
                    985:
                    986:        /*
1.237     ad        987:         * Deal with set[ug]id.  MNT_NOSUID has already been used to disable
                    988:         * s[ug]id.  It's OK to check for PSL_TRACED here as we have blocked
                    989:         * out additional references on the process for the moment.
1.55      cgd       990:         */
1.237     ad        991:        if ((p->p_slflag & PSL_TRACED) == 0 &&
1.141     thorpej   992:
                    993:            (((attr.va_mode & S_ISUID) != 0 &&
1.221     ad        994:              kauth_cred_geteuid(l->l_cred) != attr.va_uid) ||
1.141     thorpej   995:
                    996:             ((attr.va_mode & S_ISGID) != 0 &&
1.221     ad        997:              kauth_cred_getegid(l->l_cred) != attr.va_gid))) {
1.141     thorpej   998:                /*
                    999:                 * Mark the process as SUGID before we do
                   1000:                 * anything that might block.
                   1001:                 */
1.237     ad       1002:                proc_crmod_enter();
1.240     thorpej  1003:                proc_crmod_leave(NULL, NULL, true);
1.152     christos 1004:
                   1005:                /* Make sure file descriptors 0..2 are in use. */
1.270     ad       1006:                if ((error = fd_checkstd()) != 0) {
1.209     christos 1007:                        DPRINTF(("execve: fdcheckstd failed %d\n", error));
1.152     christos 1008:                        goto exec_abort;
1.209     christos 1009:                }
1.141     thorpej  1010:
1.220     ad       1011:                /*
                   1012:                 * Copy the credential so other references don't see our
                   1013:                 * changes.
                   1014:                 */
1.221     ad       1015:                l->l_cred = kauth_cred_copy(l->l_cred);
1.55      cgd      1016: #ifdef KTRACE
                   1017:                /*
1.268     elad     1018:                 * If the persistent trace flag isn't set, turn off.
1.55      cgd      1019:                 */
1.237     ad       1020:                if (p->p_tracep) {
1.247     ad       1021:                        mutex_enter(&ktrace_lock);
1.268     elad     1022:                        if (!(p->p_traceflag & KTRFAC_PERSISTENT))
1.237     ad       1023:                                ktrderef(p);
1.247     ad       1024:                        mutex_exit(&ktrace_lock);
1.237     ad       1025:                }
1.55      cgd      1026: #endif
1.83      mycroft  1027:                if (attr.va_mode & S_ISUID)
1.221     ad       1028:                        kauth_cred_seteuid(l->l_cred, attr.va_uid);
1.83      mycroft  1029:                if (attr.va_mode & S_ISGID)
1.221     ad       1030:                        kauth_cred_setegid(l->l_cred, attr.va_gid);
1.210     christos 1031:        } else {
1.221     ad       1032:                if (kauth_cred_geteuid(l->l_cred) ==
                   1033:                    kauth_cred_getuid(l->l_cred) &&
                   1034:                    kauth_cred_getegid(l->l_cred) ==
                   1035:                    kauth_cred_getgid(l->l_cred))
1.238     pavel    1036:                        p->p_flag &= ~PK_SUGID;
1.210     christos 1037:        }
1.220     ad       1038:
                   1039:        /*
                   1040:         * Copy the credential so other references don't see our changes.
                   1041:         * Test to see if this is necessary first, since in the common case
                   1042:         * we won't need a private reference.
                   1043:         */
1.221     ad       1044:        if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
                   1045:            kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
                   1046:                l->l_cred = kauth_cred_copy(l->l_cred);
                   1047:                kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
                   1048:                kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
1.220     ad       1049:        }
1.155     gmcgarry 1050:
1.221     ad       1051:        /* Update the master credentials. */
1.227     ad       1052:        if (l->l_cred != p->p_cred) {
                   1053:                kauth_cred_t ocred;
                   1054:
                   1055:                kauth_cred_hold(l->l_cred);
1.272     ad       1056:                mutex_enter(p->p_lock);
1.227     ad       1057:                ocred = p->p_cred;
                   1058:                p->p_cred = l->l_cred;
1.272     ad       1059:                mutex_exit(p->p_lock);
1.227     ad       1060:                kauth_cred_free(ocred);
                   1061:        }
1.221     ad       1062:
1.155     gmcgarry 1063: #if defined(__HAVE_RAS)
                   1064:        /*
                   1065:         * Remove all RASs from the address space.
                   1066:         */
1.251     ad       1067:        ras_purgeall();
1.155     gmcgarry 1068: #endif
1.107     fvdl     1069:
                   1070:        doexechooks(p);
1.55      cgd      1071:
                   1072:        /* setup new registers and do misc. setup. */
1.292     matt     1073:        (*pack.ep_esch->es_emul->e_setregs)(l, &pack, (vaddr_t)stack);
1.244     dsl      1074:        if (pack.ep_esch->es_setregs)
1.292     matt     1075:                (*pack.ep_esch->es_setregs)(l, &pack, (vaddr_t)stack);
1.55      cgd      1076:
1.171     chs      1077:        /* map the process's signal trampoline code */
1.305     matt     1078:        if ((error = exec_sigcode_map(p, pack.ep_esch->es_emul)) != 0) {
1.209     christos 1079:                DPRINTF(("execve: map sigcode failed %d\n", error));
1.171     chs      1080:                goto exec_abort;
1.209     christos 1081:        }
1.171     chs      1082:
1.277     ad       1083:        pool_put(&exec_pool, argp);
1.276     ad       1084:
                   1085:        /* notify others that we exec'd */
                   1086:        KNOTE(&p->p_klist, NOTE_EXEC);
                   1087:
1.265     yamt     1088:        kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1.122     jdolecek 1089:
1.294     darran   1090:        SDT_PROBE(proc,,,exec_success, path, 0, 0, 0, 0);
                   1091:
1.244     dsl      1092:        /* The emulation root will usually have been found when we looked
                   1093:         * for the elf interpreter (or similar), if not look now. */
                   1094:        if (pack.ep_esch->es_emul->e_path != NULL && pack.ep_emul_root == NULL)
                   1095:                emul_find_root(l, &pack);
                   1096:
                   1097:        /* Any old emulation root got removed by fdcloseexec */
1.259     ad       1098:        rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
1.244     dsl      1099:        p->p_cwdi->cwdi_edir = pack.ep_emul_root;
1.259     ad       1100:        rw_exit(&p->p_cwdi->cwdi_lock);
1.244     dsl      1101:        pack.ep_emul_root = NULL;
                   1102:        if (pack.ep_interp != NULL)
                   1103:                vrele(pack.ep_interp);
                   1104:
1.122     jdolecek 1105:        /*
1.194     peter    1106:         * Call emulation specific exec hook. This can setup per-process
1.122     jdolecek 1107:         * p->p_emuldata or do any other per-process stuff an emulation needs.
                   1108:         *
                   1109:         * If we are executing process of different emulation than the
                   1110:         * original forked process, call e_proc_exit() of the old emulation
                   1111:         * first, then e_proc_exec() of new emulation. If the emulation is
                   1112:         * same, the exec hook code should deallocate any old emulation
                   1113:         * resources held previously by this process.
                   1114:         */
1.124     jdolecek 1115:        if (p->p_emul && p->p_emul->e_proc_exit
1.244     dsl      1116:            && p->p_emul != pack.ep_esch->es_emul)
1.122     jdolecek 1117:                (*p->p_emul->e_proc_exit)(p);
                   1118:
1.123     jdolecek 1119:        /*
1.299     chs      1120:         * This is now LWP 1.
                   1121:         */
                   1122:        mutex_enter(p->p_lock);
                   1123:        p->p_nlwpid = 1;
                   1124:        l->l_lid = 1;
                   1125:        mutex_exit(p->p_lock);
                   1126:
                   1127:        /*
1.123     jdolecek 1128:         * Call exec hook. Emulation code may NOT store reference to anything
                   1129:         * from &pack.
                   1130:         */
1.244     dsl      1131:         if (pack.ep_esch->es_emul->e_proc_exec)
                   1132:                 (*pack.ep_esch->es_emul->e_proc_exec)(p, &pack);
1.122     jdolecek 1133:
                   1134:        /* update p_emul, the old value is no longer needed */
1.244     dsl      1135:        p->p_emul = pack.ep_esch->es_emul;
1.148     thorpej  1136:
                   1137:        /* ...and the same for p_execsw */
1.244     dsl      1138:        p->p_execsw = pack.ep_esch;
1.148     thorpej  1139:
1.133     mycroft  1140: #ifdef __HAVE_SYSCALL_INTERN
                   1141:        (*p->p_emul->e_syscall_intern)(p);
                   1142: #endif
1.247     ad       1143:        ktremul();
1.85      mycroft  1144:
1.252     ad       1145:        /* Allow new references from the debugger/procfs. */
                   1146:        rw_exit(&p->p_reflock);
1.237     ad       1147:        rw_exit(&exec_lock);
1.162     manu     1148:
1.271     ad       1149:        mutex_enter(proc_lock);
1.237     ad       1150:
                   1151:        if ((p->p_slflag & (PSL_TRACED|PSL_SYSCALL)) == PSL_TRACED) {
                   1152:                KSI_INIT_EMPTY(&ksi);
                   1153:                ksi.ksi_signo = SIGTRAP;
                   1154:                ksi.ksi_lid = l->l_lid;
                   1155:                kpsignal(p, &ksi, NULL);
                   1156:        }
1.162     manu     1157:
1.237     ad       1158:        if (p->p_sflag & PS_STOPEXEC) {
                   1159:                KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1.175     dsl      1160:                p->p_pptr->p_nstopchild++;
1.237     ad       1161:                p->p_pptr->p_waited = 0;
1.272     ad       1162:                mutex_enter(p->p_lock);
1.237     ad       1163:                ksiginfo_queue_init(&kq);
                   1164:                sigclearall(p, &contsigmask, &kq);
                   1165:                lwp_lock(l);
                   1166:                l->l_stat = LSSTOP;
1.162     manu     1167:                p->p_stat = SSTOP;
1.164     thorpej  1168:                p->p_nrlwps--;
1.304     rmind    1169:                lwp_unlock(l);
1.272     ad       1170:                mutex_exit(p->p_lock);
1.271     ad       1171:                mutex_exit(proc_lock);
1.304     rmind    1172:                lwp_lock(l);
1.245     yamt     1173:                mi_switch(l);
1.237     ad       1174:                ksiginfo_queue_drain(&kq);
                   1175:                KERNEL_LOCK(l->l_biglocks, l);
                   1176:        } else {
1.271     ad       1177:                mutex_exit(proc_lock);
1.162     manu     1178:        }
                   1179:
1.301     dholland 1180:        pathbuf_stringcopy_put(pb, pathstring);
                   1181:        pathbuf_destroy(pb);
1.295     dholland 1182:        PNBUF_PUT(resolvedpathbuf);
1.85      mycroft  1183:        return (EJUSTRETURN);
1.55      cgd      1184:
1.138     lukem    1185:  bad:
1.55      cgd      1186:        /* free the vmspace-creation commands, and release their references */
                   1187:        kill_vmcmds(&pack.ep_vmcmds);
                   1188:        /* kill any opened file descriptor, if necessary */
                   1189:        if (pack.ep_flags & EXEC_HASFD) {
                   1190:                pack.ep_flags &= ~EXEC_HASFD;
1.270     ad       1191:                fd_close(pack.ep_fd);
1.55      cgd      1192:        }
                   1193:        /* close and put the exec'd file */
1.99      wrstuden 1194:        vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
1.254     pooka    1195:        VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred);
1.99      wrstuden 1196:        vput(pack.ep_vp);
1.277     ad       1197:        pool_put(&exec_pool, argp);
1.55      cgd      1198:
1.138     lukem    1199:  freehdr:
1.265     yamt     1200:        kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1.244     dsl      1201:        if (pack.ep_emul_root != NULL)
                   1202:                vrele(pack.ep_emul_root);
                   1203:        if (pack.ep_interp != NULL)
                   1204:                vrele(pack.ep_interp);
1.200     elad     1205:
1.274     ad       1206:        rw_exit(&exec_lock);
                   1207:
1.301     dholland 1208:        pathbuf_stringcopy_put(pb, pathstring);
                   1209:        pathbuf_destroy(pb);
1.295     dholland 1210:        PNBUF_PUT(resolvedpathbuf);
                   1211:
1.200     elad     1212:  clrflg:
1.279     wrstuden 1213:        lwp_lock(l);
                   1214:        l->l_flag |= oldlwpflags;
                   1215:        lwp_unlock(l);
1.297     rmind    1216:        rw_exit(&p->p_reflock);
                   1217:
1.282     ad       1218:        if (modgen != module_gen && error == ENOEXEC) {
                   1219:                modgen = module_gen;
                   1220:                exec_autoload();
                   1221:                goto retry;
                   1222:        }
                   1223:
1.294     darran   1224:        SDT_PROBE(proc,,,exec_failure, error, 0, 0, 0, 0);
1.55      cgd      1225:        return error;
                   1226:
1.138     lukem    1227:  exec_abort:
1.294     darran   1228:        SDT_PROBE(proc,,,exec_failure, error, 0, 0, 0, 0);
1.297     rmind    1229:        rw_exit(&p->p_reflock);
                   1230:        rw_exit(&exec_lock);
                   1231:
1.301     dholland 1232:        pathbuf_stringcopy_put(pb, pathstring);
                   1233:        pathbuf_destroy(pb);
1.295     dholland 1234:        PNBUF_PUT(resolvedpathbuf);
1.130     jdolecek 1235:
1.55      cgd      1236:        /*
                   1237:         * the old process doesn't exist anymore.  exit gracefully.
                   1238:         * get rid of the (new) address space we have created, if any, get rid
                   1239:         * of our namei data and vnode, and exit noting failure
                   1240:         */
1.88      mrg      1241:        uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
                   1242:                VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
1.73      mycroft  1243:        if (pack.ep_emul_arg)
1.284     cegger   1244:                free(pack.ep_emul_arg, M_TEMP);
1.277     ad       1245:        pool_put(&exec_pool, argp);
1.265     yamt     1246:        kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1.244     dsl      1247:        if (pack.ep_emul_root != NULL)
                   1248:                vrele(pack.ep_emul_root);
                   1249:        if (pack.ep_interp != NULL)
                   1250:                vrele(pack.ep_interp);
1.237     ad       1251:
1.252     ad       1252:        /* Acquire the sched-state mutex (exit1() will release it). */
1.272     ad       1253:        mutex_enter(p->p_lock);
1.164     thorpej  1254:        exit1(l, W_EXITCODE(error, SIGABRT));
1.55      cgd      1255:
                   1256:        /* NOTREACHED */
                   1257:        return 0;
1.67      christos 1258: }
                   1259:
                   1260:
1.144     christos 1261: int
1.231     yamt     1262: copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
                   1263:     char **stackp, void *argp)
1.67      christos 1264: {
1.138     lukem    1265:        char    **cpp, *dp, *sp;
                   1266:        size_t  len;
                   1267:        void    *nullp;
                   1268:        long    argc, envc;
1.144     christos 1269:        int     error;
1.138     lukem    1270:
1.144     christos 1271:        cpp = (char **)*stackp;
1.138     lukem    1272:        nullp = NULL;
                   1273:        argc = arginfo->ps_nargvstr;
                   1274:        envc = arginfo->ps_nenvstr;
1.305     matt     1275:        if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0) {
                   1276:                DPRINTF(("copyargs:%d copyout @%p %zu\n", __LINE__, cpp-1, sizeof(argc)));
1.144     christos 1277:                return error;
1.305     matt     1278:        }
1.67      christos 1279:
1.244     dsl      1280:        dp = (char *) (cpp + argc + envc + 2 + pack->ep_esch->es_arglen);
1.67      christos 1281:        sp = argp;
                   1282:
                   1283:        /* XXX don't copy them out, remap them! */
1.69      mycroft  1284:        arginfo->ps_argvstr = cpp; /* remember location of argv for later */
1.67      christos 1285:
1.305     matt     1286:        for (; --argc >= 0; sp += len, dp += len) {
                   1287:                if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
                   1288:                        DPRINTF(("copyargs:%d copyout @%p %zu\n", __LINE__, cpp-1, sizeof(dp)));
                   1289:                        return error;
                   1290:                }
                   1291:                if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
                   1292:                        DPRINTF(("copyargs:%d copyoutstr @%p %u\n", __LINE__, dp, ARG_MAX));
1.144     christos 1293:                        return error;
1.305     matt     1294:                }
                   1295:        }
1.67      christos 1296:
1.305     matt     1297:        if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
                   1298:                DPRINTF(("copyargs:%d copyout @%p %zu\n", __LINE__, cpp-1, sizeof(nullp)));
1.144     christos 1299:                return error;
1.305     matt     1300:        }
1.67      christos 1301:
1.69      mycroft  1302:        arginfo->ps_envstr = cpp; /* remember location of envp for later */
1.67      christos 1303:
1.305     matt     1304:        for (; --envc >= 0; sp += len, dp += len) {
                   1305:                if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
                   1306:                        DPRINTF(("copyargs:%d copyout @%p %zu\n", __LINE__, cpp-1, sizeof(dp)));
1.144     christos 1307:                        return error;
1.305     matt     1308:                }
                   1309:                if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
                   1310:                        DPRINTF(("copyargs:%d copyoutstr @%p %u\n", __LINE__, dp, ARG_MAX));
                   1311:                        return error;
                   1312:                }
                   1313:        }
1.67      christos 1314:
1.305     matt     1315:        if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
                   1316:                DPRINTF(("copyargs:%d copyout @%p %zu\n", __LINE__, cpp-1, sizeof(nullp)));
1.144     christos 1317:                return error;
1.305     matt     1318:        }
1.67      christos 1319:
1.144     christos 1320:        *stackp = (char *)cpp;
                   1321:        return 0;
1.55      cgd      1322: }
1.130     jdolecek 1323:
                   1324:
                   1325: /*
1.282     ad       1326:  * Add execsw[] entries.
1.130     jdolecek 1327:  */
                   1328: int
1.282     ad       1329: exec_add(struct execsw *esp, int count)
1.130     jdolecek 1330: {
1.282     ad       1331:        struct exec_entry       *it;
                   1332:        int                     i;
1.130     jdolecek 1333:
1.283     ad       1334:        if (count == 0) {
                   1335:                return 0;
                   1336:        }
1.130     jdolecek 1337:
1.282     ad       1338:        /* Check for duplicates. */
1.237     ad       1339:        rw_enter(&exec_lock, RW_WRITER);
1.282     ad       1340:        for (i = 0; i < count; i++) {
                   1341:                LIST_FOREACH(it, &ex_head, ex_list) {
                   1342:                        /* assume unique (makecmds, probe_func, emulation) */
                   1343:                        if (it->ex_sw->es_makecmds == esp[i].es_makecmds &&
                   1344:                            it->ex_sw->u.elf_probe_func ==
                   1345:                            esp[i].u.elf_probe_func &&
                   1346:                            it->ex_sw->es_emul == esp[i].es_emul) {
                   1347:                                rw_exit(&exec_lock);
                   1348:                                return EEXIST;
1.130     jdolecek 1349:                        }
                   1350:                }
                   1351:        }
                   1352:
1.282     ad       1353:        /* Allocate new entries. */
                   1354:        for (i = 0; i < count; i++) {
                   1355:                it = kmem_alloc(sizeof(*it), KM_SLEEP);
                   1356:                it->ex_sw = &esp[i];
                   1357:                LIST_INSERT_HEAD(&ex_head, it, ex_list);
1.130     jdolecek 1358:        }
                   1359:
                   1360:        /* update execsw[] */
                   1361:        exec_init(0);
1.237     ad       1362:        rw_exit(&exec_lock);
1.282     ad       1363:        return 0;
1.130     jdolecek 1364: }
                   1365:
                   1366: /*
                   1367:  * Remove execsw[] entry.
                   1368:  */
                   1369: int
1.282     ad       1370: exec_remove(struct execsw *esp, int count)
1.130     jdolecek 1371: {
1.282     ad       1372:        struct exec_entry       *it, *next;
                   1373:        int                     i;
                   1374:        const struct proclist_desc *pd;
                   1375:        proc_t                  *p;
                   1376:
1.283     ad       1377:        if (count == 0) {
                   1378:                return 0;
                   1379:        }
1.130     jdolecek 1380:
1.282     ad       1381:        /* Abort if any are busy. */
1.237     ad       1382:        rw_enter(&exec_lock, RW_WRITER);
1.282     ad       1383:        for (i = 0; i < count; i++) {
                   1384:                mutex_enter(proc_lock);
                   1385:                for (pd = proclists; pd->pd_list != NULL; pd++) {
                   1386:                        PROCLIST_FOREACH(p, pd->pd_list) {
                   1387:                                if (p->p_execsw == &esp[i]) {
                   1388:                                        mutex_exit(proc_lock);
                   1389:                                        rw_exit(&exec_lock);
                   1390:                                        return EBUSY;
                   1391:                                }
                   1392:                        }
                   1393:                }
                   1394:                mutex_exit(proc_lock);
                   1395:        }
1.130     jdolecek 1396:
1.282     ad       1397:        /* None are busy, so remove them all. */
                   1398:        for (i = 0; i < count; i++) {
                   1399:                for (it = LIST_FIRST(&ex_head); it != NULL; it = next) {
                   1400:                        next = LIST_NEXT(it, ex_list);
                   1401:                        if (it->ex_sw == &esp[i]) {
                   1402:                                LIST_REMOVE(it, ex_list);
                   1403:                                kmem_free(it, sizeof(*it));
                   1404:                                break;
                   1405:                        }
                   1406:                }
1.130     jdolecek 1407:        }
                   1408:
                   1409:        /* update execsw[] */
                   1410:        exec_init(0);
1.237     ad       1411:        rw_exit(&exec_lock);
1.282     ad       1412:        return 0;
1.130     jdolecek 1413: }
                   1414:
                   1415: /*
                   1416:  * Initialize exec structures. If init_boot is true, also does necessary
                   1417:  * one-time initialization (it's called from main() that way).
1.147     jdolecek 1418:  * Once system is multiuser, this should be called with exec_lock held,
1.130     jdolecek 1419:  * i.e. via exec_{add|remove}().
                   1420:  */
                   1421: int
1.138     lukem    1422: exec_init(int init_boot)
1.130     jdolecek 1423: {
1.282     ad       1424:        const struct execsw     **sw;
                   1425:        struct exec_entry       *ex;
                   1426:        SLIST_HEAD(,exec_entry) first;
                   1427:        SLIST_HEAD(,exec_entry) any;
                   1428:        SLIST_HEAD(,exec_entry) last;
                   1429:        int                     i, sz;
1.130     jdolecek 1430:
                   1431:        if (init_boot) {
                   1432:                /* do one-time initializations */
1.237     ad       1433:                rw_init(&exec_lock);
1.259     ad       1434:                mutex_init(&sigobject_lock, MUTEX_DEFAULT, IPL_NONE);
1.277     ad       1435:                pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
                   1436:                    "execargs", &exec_palloc, IPL_NONE);
                   1437:                pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
1.282     ad       1438:        } else {
                   1439:                KASSERT(rw_write_held(&exec_lock));
                   1440:        }
1.130     jdolecek 1441:
1.282     ad       1442:        /* Sort each entry onto the appropriate queue. */
                   1443:        SLIST_INIT(&first);
                   1444:        SLIST_INIT(&any);
                   1445:        SLIST_INIT(&last);
                   1446:        sz = 0;
                   1447:        LIST_FOREACH(ex, &ex_head, ex_list) {
                   1448:                switch(ex->ex_sw->es_prio) {
                   1449:                case EXECSW_PRIO_FIRST:
                   1450:                        SLIST_INSERT_HEAD(&first, ex, ex_slist);
                   1451:                        break;
                   1452:                case EXECSW_PRIO_ANY:
                   1453:                        SLIST_INSERT_HEAD(&any, ex, ex_slist);
                   1454:                        break;
                   1455:                case EXECSW_PRIO_LAST:
                   1456:                        SLIST_INSERT_HEAD(&last, ex, ex_slist);
                   1457:                        break;
                   1458:                default:
                   1459:                        panic("exec_init");
                   1460:                        break;
1.130     jdolecek 1461:                }
1.282     ad       1462:                sz++;
1.130     jdolecek 1463:        }
                   1464:
                   1465:        /*
1.282     ad       1466:         * Create new execsw[].  Ensure we do not try a zero-sized
                   1467:         * allocation.
1.130     jdolecek 1468:         */
1.282     ad       1469:        sw = kmem_alloc(sz * sizeof(struct execsw *) + 1, KM_SLEEP);
                   1470:        i = 0;
                   1471:        SLIST_FOREACH(ex, &first, ex_slist) {
                   1472:                sw[i++] = ex->ex_sw;
                   1473:        }
                   1474:        SLIST_FOREACH(ex, &any, ex_slist) {
                   1475:                sw[i++] = ex->ex_sw;
                   1476:        }
                   1477:        SLIST_FOREACH(ex, &last, ex_slist) {
                   1478:                sw[i++] = ex->ex_sw;
1.130     jdolecek 1479:        }
1.183     junyoung 1480:
1.282     ad       1481:        /* Replace old execsw[] and free used memory. */
                   1482:        if (execsw != NULL) {
                   1483:                kmem_free(__UNCONST(execsw),
                   1484:                    nexecs * sizeof(struct execsw *) + 1);
1.130     jdolecek 1485:        }
1.282     ad       1486:        execsw = sw;
                   1487:        nexecs = sz;
1.130     jdolecek 1488:
1.282     ad       1489:        /* Figure out the maximum size of an exec header. */
                   1490:        exec_maxhdrsz = sizeof(int);
1.130     jdolecek 1491:        for (i = 0; i < nexecs; i++) {
                   1492:                if (execsw[i]->es_hdrsz > exec_maxhdrsz)
                   1493:                        exec_maxhdrsz = execsw[i]->es_hdrsz;
                   1494:        }
                   1495:
                   1496:        return 0;
                   1497: }
1.171     chs      1498:
                   1499: static int
                   1500: exec_sigcode_map(struct proc *p, const struct emul *e)
                   1501: {
                   1502:        vaddr_t va;
                   1503:        vsize_t sz;
                   1504:        int error;
                   1505:        struct uvm_object *uobj;
                   1506:
1.184     drochner 1507:        sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
                   1508:
                   1509:        if (e->e_sigobject == NULL || sz == 0) {
1.171     chs      1510:                return 0;
                   1511:        }
                   1512:
                   1513:        /*
                   1514:         * If we don't have a sigobject for this emulation, create one.
                   1515:         *
                   1516:         * sigobject is an anonymous memory object (just like SYSV shared
                   1517:         * memory) that we keep a permanent reference to and that we map
                   1518:         * in all processes that need this sigcode. The creation is simple,
                   1519:         * we create an object, add a permanent reference to it, map it in
                   1520:         * kernel space, copy out the sigcode to it and unmap it.
1.189     jdolecek 1521:         * We map it with PROT_READ|PROT_EXEC into the process just
                   1522:         * the way sys_mmap() would map it.
1.171     chs      1523:         */
                   1524:
                   1525:        uobj = *e->e_sigobject;
                   1526:        if (uobj == NULL) {
1.259     ad       1527:                mutex_enter(&sigobject_lock);
                   1528:                if ((uobj = *e->e_sigobject) == NULL) {
                   1529:                        uobj = uao_create(sz, 0);
                   1530:                        (*uobj->pgops->pgo_reference)(uobj);
                   1531:                        va = vm_map_min(kernel_map);
                   1532:                        if ((error = uvm_map(kernel_map, &va, round_page(sz),
                   1533:                            uobj, 0, 0,
                   1534:                            UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
                   1535:                            UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
                   1536:                                printf("kernel mapping failed %d\n", error);
                   1537:                                (*uobj->pgops->pgo_detach)(uobj);
                   1538:                                mutex_exit(&sigobject_lock);
                   1539:                                return (error);
                   1540:                        }
                   1541:                        memcpy((void *)va, e->e_sigcode, sz);
1.171     chs      1542: #ifdef PMAP_NEED_PROCWR
1.259     ad       1543:                        pmap_procwr(&proc0, va, sz);
1.171     chs      1544: #endif
1.259     ad       1545:                        uvm_unmap(kernel_map, va, va + round_page(sz));
                   1546:                        *e->e_sigobject = uobj;
                   1547:                }
                   1548:                mutex_exit(&sigobject_lock);
1.171     chs      1549:        }
                   1550:
1.172     enami    1551:        /* Just a hint to uvm_map where to put it. */
1.195     fvdl     1552:        va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
                   1553:            round_page(sz));
1.187     chs      1554:
                   1555: #ifdef __alpha__
                   1556:        /*
                   1557:         * Tru64 puts /sbin/loader at the end of user virtual memory,
                   1558:         * which causes the above calculation to put the sigcode at
                   1559:         * an invalid address.  Put it just below the text instead.
                   1560:         */
1.193     jmc      1561:        if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
1.187     chs      1562:                va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
                   1563:        }
                   1564: #endif
                   1565:
1.171     chs      1566:        (*uobj->pgops->pgo_reference)(uobj);
                   1567:        error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
                   1568:                        uobj, 0, 0,
                   1569:                        UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
                   1570:                                    UVM_ADV_RANDOM, 0));
                   1571:        if (error) {
1.305     matt     1572:                DPRINTF(("exec_sigcode_map:%d map %p "
                   1573:                    "uvm_map %#"PRIxVSIZE"@%#"PRIxVADDR" failed %d\n",
                   1574:                    __LINE__, &p->p_vmspace->vm_map, round_page(sz), va,
                   1575:                    error));
1.171     chs      1576:                (*uobj->pgops->pgo_detach)(uobj);
                   1577:                return (error);
                   1578:        }
                   1579:        p->p_sigctx.ps_sigcode = (void *)va;
                   1580:        return (0);
                   1581: }

CVSweb <webmaster@jp.NetBSD.org>