[BACK]Return to kern_exec.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/kern_exec.c, Revision 1.294.2.5

1.294.2.3  rmind       1: /*     $NetBSD$        */
1.277     ad          2:
                      3: /*-
                      4:  * Copyright (c) 2008 The NetBSD Foundation, Inc.
                      5:  * All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
                     15:  *
                     16:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     17:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     18:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     19:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     20:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     21:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     22:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     23:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     24:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     25:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     26:  * POSSIBILITY OF SUCH DAMAGE.
                     27:  */
1.55      cgd        28:
                     29: /*-
1.77      cgd        30:  * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
1.55      cgd        31:  * Copyright (C) 1992 Wolfgang Solfrank.
                     32:  * Copyright (C) 1992 TooLs GmbH.
                     33:  * All rights reserved.
                     34:  *
                     35:  * Redistribution and use in source and binary forms, with or without
                     36:  * modification, are permitted provided that the following conditions
                     37:  * are met:
                     38:  * 1. Redistributions of source code must retain the above copyright
                     39:  *    notice, this list of conditions and the following disclaimer.
                     40:  * 2. Redistributions in binary form must reproduce the above copyright
                     41:  *    notice, this list of conditions and the following disclaimer in the
                     42:  *    documentation and/or other materials provided with the distribution.
                     43:  * 3. All advertising materials mentioning features or use of this software
                     44:  *    must display the following acknowledgement:
                     45:  *     This product includes software developed by TooLs GmbH.
                     46:  * 4. The name of TooLs GmbH may not be used to endorse or promote products
                     47:  *    derived from this software without specific prior written permission.
                     48:  *
                     49:  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
                     50:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
                     51:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     52:  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
                     53:  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
                     54:  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
                     55:  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
                     56:  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
                     57:  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
                     58:  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     59:  */
1.146     lukem      60:
                     61: #include <sys/cdefs.h>
1.294.2.3  rmind      62: __KERNEL_RCSID(0, "$NetBSD$");
1.89      mrg        63:
1.92      thorpej    64: #include "opt_ktrace.h"
1.285     apb        65: #include "opt_modular.h"
1.124     jdolecek   66: #include "opt_syscall_debug.h"
1.226     dogcow     67: #include "veriexec.h"
1.232     elad       68: #include "opt_pax.h"
1.279     wrstuden   69: #include "opt_sa.h"
1.55      cgd        70:
                     71: #include <sys/param.h>
                     72: #include <sys/systm.h>
                     73: #include <sys/filedesc.h>
                     74: #include <sys/kernel.h>
                     75: #include <sys/proc.h>
                     76: #include <sys/mount.h>
                     77: #include <sys/malloc.h>
1.265     yamt       78: #include <sys/kmem.h>
1.55      cgd        79: #include <sys/namei.h>
                     80: #include <sys/vnode.h>
                     81: #include <sys/file.h>
                     82: #include <sys/acct.h>
                     83: #include <sys/exec.h>
                     84: #include <sys/ktrace.h>
1.278     pooka      85: #include <sys/uidinfo.h>
1.55      cgd        86: #include <sys/wait.h>
                     87: #include <sys/mman.h>
1.155     gmcgarry   88: #include <sys/ras.h>
1.55      cgd        89: #include <sys/signalvar.h>
                     90: #include <sys/stat.h>
1.124     jdolecek   91: #include <sys/syscall.h>
1.218     elad       92: #include <sys/kauth.h>
1.253     ad         93: #include <sys/lwpctl.h>
1.260     christos   94: #include <sys/pax.h>
1.263     ad         95: #include <sys/cpu.h>
1.282     ad         96: #include <sys/module.h>
1.279     wrstuden   97: #include <sys/sa.h>
                     98: #include <sys/savar.h>
1.289     pooka      99: #include <sys/syscallvar.h>
1.56      cgd       100: #include <sys/syscallargs.h>
1.222     elad      101: #if NVERIEXEC > 0
1.197     blymn     102: #include <sys/verified_exec.h>
1.222     elad      103: #endif /* NVERIEXEC > 0 */
1.294     darran    104: #include <sys/sdt.h>
1.55      cgd       105:
1.88      mrg       106: #include <uvm/uvm_extern.h>
                    107:
1.55      cgd       108: #include <machine/reg.h>
                    109:
1.244     dsl       110: #include <compat/common/compat_util.h>
                    111:
1.171     chs       112: static int exec_sigcode_map(struct proc *, const struct emul *);
                    113:
1.143     christos  114: #ifdef DEBUG_EXEC
1.294.2.3  rmind     115: #define DPRINTF(a) printf a
1.294.2.4  rmind     116: #define COPYPRINTF(s, a, b) printf("%s, %d: copyout%s @%p %zu\n", __func__, \
                    117:     __LINE__, (s), (a), (b))
1.143     christos  118: #else
                    119: #define DPRINTF(a)
1.294.2.4  rmind     120: #define COPYPRINTF(s, a, b)
1.143     christos  121: #endif /* DEBUG_EXEC */
1.165     thorpej   122:
1.130     jdolecek  123: /*
1.294     darran    124:  * DTrace SDT provider definitions
                    125:  */
                    126: SDT_PROBE_DEFINE(proc,,,exec,
                    127:            "char *", NULL,
                    128:            NULL, NULL, NULL, NULL,
                    129:            NULL, NULL, NULL, NULL);
                    130: SDT_PROBE_DEFINE(proc,,,exec_success,
                    131:            "char *", NULL,
                    132:            NULL, NULL, NULL, NULL,
                    133:            NULL, NULL, NULL, NULL);
                    134: SDT_PROBE_DEFINE(proc,,,exec_failure,
                    135:            "int", NULL,
                    136:            NULL, NULL, NULL, NULL,
                    137:            NULL, NULL, NULL, NULL);
                    138:
                    139: /*
1.130     jdolecek  140:  * Exec function switch:
                    141:  *
                    142:  * Note that each makecmds function is responsible for loading the
                    143:  * exec package with the necessary functions for any exec-type-specific
                    144:  * handling.
                    145:  *
                    146:  * Functions for specific exec types should be defined in their own
                    147:  * header file.
                    148:  */
1.138     lukem     149: static const struct execsw     **execsw = NULL;
                    150: static int                     nexecs;
                    151:
1.282     ad        152: u_int  exec_maxhdrsz;   /* must not be static - used by netbsd32 */
1.130     jdolecek  153:
                    154: /* list of dynamically loaded execsw entries */
1.282     ad        155: static LIST_HEAD(execlist_head, exec_entry) ex_head =
                    156:     LIST_HEAD_INITIALIZER(ex_head);
1.130     jdolecek  157: struct exec_entry {
1.138     lukem     158:        LIST_ENTRY(exec_entry)  ex_list;
1.282     ad        159:        SLIST_ENTRY(exec_entry) ex_slist;
                    160:        const struct execsw     *ex_sw;
1.130     jdolecek  161: };
                    162:
1.203     christos  163: #ifndef __HAVE_SYSCALL_INTERN
                    164: void   syscall(void);
                    165: #endif
                    166:
1.280     matt      167: #ifdef KERN_SA
1.282     ad        168: static struct sa_emul saemul_netbsd = {
1.279     wrstuden  169:        sizeof(ucontext_t),
                    170:        sizeof(struct sa_t),
                    171:        sizeof(struct sa_t *),
                    172:        NULL,
                    173:        NULL,
                    174:        cpu_upcall,
                    175:        (void (*)(struct lwp *, void *))getucontext_sa,
                    176:        sa_ucsp
                    177: };
1.280     matt      178: #endif /* KERN_SA */
1.279     wrstuden  179:
1.173     christos  180: /* NetBSD emul struct */
1.282     ad        181: struct emul emul_netbsd = {
1.291     rmind     182:        .e_name =               "netbsd",
                    183:        .e_path =               NULL,
1.133     mycroft   184: #ifndef __HAVE_MINIMAL_EMUL
1.291     rmind     185:        .e_flags =              EMUL_HAS_SYS___syscall,
                    186:        .e_errno =              NULL,
                    187:        .e_nosys =              SYS_syscall,
                    188:        .e_nsysent =            SYS_NSYSENT,
1.133     mycroft   189: #endif
1.291     rmind     190:        .e_sysent =             sysent,
1.124     jdolecek  191: #ifdef SYSCALL_DEBUG
1.291     rmind     192:        .e_syscallnames =       syscallnames,
1.124     jdolecek  193: #else
1.291     rmind     194:        .e_syscallnames =       NULL,
1.124     jdolecek  195: #endif
1.291     rmind     196:        .e_sendsig =            sendsig,
                    197:        .e_trapsignal =         trapsignal,
                    198:        .e_tracesig =           NULL,
                    199:        .e_sigcode =            NULL,
                    200:        .e_esigcode =           NULL,
                    201:        .e_sigobject =          NULL,
                    202:        .e_setregs =            setregs,
                    203:        .e_proc_exec =          NULL,
                    204:        .e_proc_fork =          NULL,
                    205:        .e_proc_exit =          NULL,
                    206:        .e_lwp_fork =           NULL,
                    207:        .e_lwp_exit =           NULL,
1.133     mycroft   208: #ifdef __HAVE_SYSCALL_INTERN
1.291     rmind     209:        .e_syscall_intern =     syscall_intern,
1.133     mycroft   210: #else
1.291     rmind     211:        .e_syscall =            syscall,
1.133     mycroft   212: #endif
1.291     rmind     213:        .e_sysctlovly =         NULL,
                    214:        .e_fault =              NULL,
                    215:        .e_vm_default_addr =    uvm_default_mapaddr,
                    216:        .e_usertrap =           NULL,
1.280     matt      217: #ifdef KERN_SA
1.291     rmind     218:        .e_sa =                 &saemul_netbsd,
1.280     matt      219: #else
1.291     rmind     220:        .e_sa =                 NULL,
1.280     matt      221: #endif
1.291     rmind     222:        .e_ucsize =             sizeof(ucontext_t),
                    223:        .e_startlwp =           startlwp
1.124     jdolecek  224: };
                    225:
1.55      cgd       226: /*
1.130     jdolecek  227:  * Exec lock. Used to control access to execsw[] structures.
                    228:  * This must not be static so that netbsd32 can access it, too.
                    229:  */
1.237     ad        230: krwlock_t exec_lock;
1.183     junyoung  231:
1.259     ad        232: static kmutex_t sigobject_lock;
                    233:
1.277     ad        234: static void *
                    235: exec_pool_alloc(struct pool *pp, int flags)
                    236: {
                    237:
                    238:        return (void *)uvm_km_alloc(kernel_map, NCARGS, 0,
                    239:            UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
                    240: }
                    241:
                    242: static void
                    243: exec_pool_free(struct pool *pp, void *addr)
                    244: {
                    245:
                    246:        uvm_km_free(kernel_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE);
                    247: }
                    248:
                    249: static struct pool exec_pool;
                    250:
                    251: static struct pool_allocator exec_palloc = {
                    252:        .pa_alloc = exec_pool_alloc,
                    253:        .pa_free = exec_pool_free,
                    254:        .pa_pagesz = NCARGS
                    255: };
                    256:
1.130     jdolecek  257: /*
1.55      cgd       258:  * check exec:
                    259:  * given an "executable" described in the exec package's namei info,
                    260:  * see what we can do with it.
                    261:  *
                    262:  * ON ENTRY:
                    263:  *     exec package with appropriate namei info
1.212     christos  264:  *     lwp pointer of exec'ing lwp
1.55      cgd       265:  *     NO SELF-LOCKED VNODES
                    266:  *
                    267:  * ON EXIT:
                    268:  *     error:  nothing held, etc.  exec header still allocated.
1.77      cgd       269:  *     ok:     filled exec package, executable's vnode (unlocked).
1.55      cgd       270:  *
                    271:  * EXEC SWITCH ENTRY:
                    272:  *     Locked vnode to check, exec package, proc.
                    273:  *
                    274:  * EXEC SWITCH EXIT:
1.77      cgd       275:  *     ok:     return 0, filled exec package, executable's vnode (unlocked).
1.55      cgd       276:  *     error:  destructive:
                    277:  *                     everything deallocated execept exec header.
1.76      cgd       278:  *             non-destructive:
1.77      cgd       279:  *                     error code, executable's vnode (unlocked),
1.76      cgd       280:  *                     exec header unmodified.
1.55      cgd       281:  */
                    282: int
1.205     christos  283: /*ARGSUSED*/
1.294.2.3  rmind     284: check_exec(struct lwp *l, struct exec_package *epp, struct pathbuf *pb)
1.55      cgd       285: {
1.138     lukem     286:        int             error, i;
                    287:        struct vnode    *vp;
1.294.2.1  rmind     288:        struct nameidata nd;
1.138     lukem     289:        size_t          resid;
1.55      cgd       290:
1.294.2.3  rmind     291:        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1.294.2.1  rmind     292:
1.55      cgd       293:        /* first get the vnode */
1.294.2.1  rmind     294:        if ((error = namei(&nd)) != 0)
1.55      cgd       295:                return error;
1.294.2.1  rmind     296:        epp->ep_vp = vp = nd.ni_vp;
                    297:        /* this cannot overflow as both are size PATH_MAX */
1.294.2.3  rmind     298:        strcpy(epp->ep_resolvedname, nd.ni_pnbuf);
1.294.2.1  rmind     299:
                    300: #ifdef DIAGNOSTIC
                    301:        /* paranoia (take this out once namei stuff stabilizes) */
1.294.2.3  rmind     302:        memset(nd.ni_pnbuf, '~', PATH_MAX);
1.294.2.1  rmind     303: #endif
1.55      cgd       304:
1.84      mycroft   305:        /* check access and type */
1.55      cgd       306:        if (vp->v_type != VREG) {
1.81      kleink    307:                error = EACCES;
1.55      cgd       308:                goto bad1;
                    309:        }
1.254     pooka     310:        if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
1.84      mycroft   311:                goto bad1;
1.55      cgd       312:
                    313:        /* get attributes */
1.254     pooka     314:        if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
1.55      cgd       315:                goto bad1;
                    316:
                    317:        /* Check mount point */
                    318:        if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
                    319:                error = EACCES;
                    320:                goto bad1;
                    321:        }
1.141     thorpej   322:        if (vp->v_mount->mnt_flag & MNT_NOSUID)
1.83      mycroft   323:                epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
1.55      cgd       324:
                    325:        /* try to open it */
1.254     pooka     326:        if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
1.55      cgd       327:                goto bad1;
                    328:
1.99      wrstuden  329:        /* unlock vp, since we need it unlocked from here on out. */
1.294.2.2  rmind     330:        VOP_UNLOCK(vp);
1.77      cgd       331:
1.222     elad      332: #if NVERIEXEC > 0
1.294.2.1  rmind     333:        error = veriexec_verify(l, vp, epp->ep_resolvedname,
1.233     elad      334:            epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
1.236     elad      335:            NULL);
                    336:        if (error)
1.234     elad      337:                goto bad2;
1.222     elad      338: #endif /* NVERIEXEC > 0 */
1.160     blymn     339:
1.232     elad      340: #ifdef PAX_SEGVGUARD
1.294.2.1  rmind     341:        error = pax_segvguard(l, vp, epp->ep_resolvedname, false);
1.234     elad      342:        if (error)
                    343:                goto bad2;
1.232     elad      344: #endif /* PAX_SEGVGUARD */
                    345:
1.55      cgd       346:        /* now we have the file, get the exec header */
1.74      christos  347:        error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
1.223     ad        348:                        UIO_SYSSPACE, 0, l->l_cred, &resid, NULL);
1.74      christos  349:        if (error)
1.55      cgd       350:                goto bad2;
                    351:        epp->ep_hdrvalid = epp->ep_hdrlen - resid;
                    352:
                    353:        /*
1.136     eeh       354:         * Set up default address space limits.  Can be overridden
                    355:         * by individual exec packages.
1.183     junyoung  356:         *
1.235     rillig    357:         * XXX probably should be all done in the exec packages.
1.136     eeh       358:         */
                    359:        epp->ep_vm_minaddr = VM_MIN_ADDRESS;
                    360:        epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
                    361:        /*
1.55      cgd       362:         * set up the vmcmds for creation of the process
                    363:         * address space
                    364:         */
                    365:        error = ENOEXEC;
1.244     dsl       366:        for (i = 0; i < nexecs; i++) {
1.68      cgd       367:                int newerror;
                    368:
1.130     jdolecek  369:                epp->ep_esch = execsw[i];
1.212     christos  370:                newerror = (*execsw[i]->es_makecmds)(l, epp);
1.244     dsl       371:
                    372:                if (!newerror) {
                    373:                        /* Seems ok: check that entry point is sane */
                    374:                        if (epp->ep_entry > VM_MAXUSER_ADDRESS) {
                    375:                                error = ENOEXEC;
                    376:                                break;
                    377:                        }
                    378:
                    379:                        /* check limits */
                    380:                        if ((epp->ep_tsize > MAXTSIZ) ||
                    381:                            (epp->ep_dsize > (u_quad_t)l->l_proc->p_rlimit
                    382:                                                    [RLIMIT_DATA].rlim_cur)) {
                    383:                                error = ENOMEM;
                    384:                                break;
                    385:                        }
                    386:                        return 0;
                    387:                }
                    388:
                    389:                if (epp->ep_emul_root != NULL) {
                    390:                        vrele(epp->ep_emul_root);
                    391:                        epp->ep_emul_root = NULL;
                    392:                }
                    393:                if (epp->ep_interp != NULL) {
                    394:                        vrele(epp->ep_interp);
                    395:                        epp->ep_interp = NULL;
                    396:                }
                    397:
1.68      cgd       398:                /* make sure the first "interesting" error code is saved. */
1.244     dsl       399:                if (error == ENOEXEC)
1.68      cgd       400:                        error = newerror;
1.124     jdolecek  401:
1.244     dsl       402:                if (epp->ep_flags & EXEC_DESTR)
                    403:                        /* Error from "#!" code, tidied up by recursive call */
1.55      cgd       404:                        return error;
                    405:        }
                    406:
1.249     pooka     407:        /* not found, error */
                    408:
1.55      cgd       409:        /*
                    410:         * free any vmspace-creation commands,
                    411:         * and release their references
                    412:         */
                    413:        kill_vmcmds(&epp->ep_vmcmds);
                    414:
                    415: bad2:
                    416:        /*
1.99      wrstuden  417:         * close and release the vnode, restore the old one, free the
1.55      cgd       418:         * pathname buf, and punt.
                    419:         */
1.99      wrstuden  420:        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.254     pooka     421:        VOP_CLOSE(vp, FREAD, l->l_cred);
1.99      wrstuden  422:        vput(vp);
1.55      cgd       423:        return error;
                    424:
                    425: bad1:
                    426:        /*
                    427:         * free the namei pathname buffer, and put the vnode
                    428:         * (which we don't yet have open).
                    429:         */
1.77      cgd       430:        vput(vp);                               /* was still locked */
1.55      cgd       431:        return error;
                    432: }
                    433:
1.188     chs       434: #ifdef __MACHINE_STACK_GROWS_UP
                    435: #define STACK_PTHREADSPACE NBPG
                    436: #else
                    437: #define STACK_PTHREADSPACE 0
                    438: #endif
                    439:
1.204     cube      440: static int
                    441: execve_fetch_element(char * const *array, size_t index, char **value)
                    442: {
                    443:        return copyin(array + index, value, sizeof(*value));
                    444: }
                    445:
1.55      cgd       446: /*
                    447:  * exec system call
                    448:  */
                    449: /* ARGSUSED */
1.75      christos  450: int
1.258     dsl       451: sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval)
1.71      thorpej   452: {
1.258     dsl       453:        /* {
1.138     lukem     454:                syscallarg(const char *)        path;
                    455:                syscallarg(char * const *)      argp;
                    456:                syscallarg(char * const *)      envp;
1.258     dsl       457:        } */
1.204     cube      458:
                    459:        return execve1(l, SCARG(uap, path), SCARG(uap, argp),
                    460:            SCARG(uap, envp), execve_fetch_element);
                    461: }
                    462:
1.282     ad        463: /*
                    464:  * Load modules to try and execute an image that we do not understand.
                    465:  * If no execsw entries are present, we load those likely to be needed
                    466:  * in order to run native images only.  Otherwise, we autoload all
                    467:  * possible modules that could let us run the binary.  XXX lame
                    468:  */
                    469: static void
                    470: exec_autoload(void)
                    471: {
                    472: #ifdef MODULAR
                    473:        static const char * const native[] = {
                    474:                "exec_elf32",
                    475:                "exec_elf64",
                    476:                "exec_script",
                    477:                NULL
                    478:        };
                    479:        static const char * const compat[] = {
                    480:                "exec_elf32",
                    481:                "exec_elf64",
                    482:                "exec_script",
                    483:                "exec_aout",
                    484:                "exec_coff",
                    485:                "exec_ecoff",
                    486:                "compat_aoutm68k",
                    487:                "compat_freebsd",
                    488:                "compat_ibcs2",
                    489:                "compat_linux",
                    490:                "compat_linux32",
                    491:                "compat_netbsd32",
                    492:                "compat_sunos",
                    493:                "compat_sunos32",
                    494:                "compat_svr4",
                    495:                "compat_svr4_32",
                    496:                "compat_ultrix",
                    497:                NULL
                    498:        };
                    499:        char const * const *list;
                    500:        int i;
                    501:
                    502:        list = (nexecs == 0 ? native : compat);
                    503:        for (i = 0; list[i] != NULL; i++) {
                    504:                if (module_autoload(list[i], MODULE_CLASS_MISC) != 0) {
                    505:                        continue;
                    506:                }
                    507:                yield();
                    508:        }
                    509: #endif
                    510: }
                    511:
1.204     cube      512: int
                    513: execve1(struct lwp *l, const char *path, char * const *args,
                    514:     char * const *envs, execve_fetch_element_t fetch_element)
                    515: {
1.153     thorpej   516:        int                     error;
1.138     lukem     517:        struct exec_package     pack;
1.294.2.3  rmind     518:        struct pathbuf          *pb;
1.138     lukem     519:        struct vattr            attr;
1.164     thorpej   520:        struct proc             *p;
1.138     lukem     521:        char                    *argp;
                    522:        char                    *dp, *sp;
                    523:        long                    argc, envc;
1.248     christos  524:        size_t                  i, len;
1.138     lukem     525:        char                    *stack;
                    526:        struct ps_strings       arginfo;
1.294.2.3  rmind     527:        struct ps_strings32     arginfo32;
                    528:        void                    *aip;
1.138     lukem     529:        struct vmspace          *vm;
1.265     yamt      530:        struct exec_fakearg     *tmpfap;
1.138     lukem     531:        int                     szsigcode;
                    532:        struct exec_vmcmd       *base_vcp;
1.279     wrstuden  533:        int                     oldlwpflags;
1.237     ad        534:        ksiginfo_t              ksi;
                    535:        ksiginfoq_t             kq;
1.294.2.3  rmind     536:        const char              *pathstring;
1.294.2.1  rmind     537:        char                    *resolvedpathbuf;
                    538:        const char              *commandname;
1.282     ad        539:        u_int                   modgen;
1.294.2.3  rmind     540:        size_t                  ps_strings_sz;
1.55      cgd       541:
1.237     ad        542:        p = l->l_proc;
1.282     ad        543:        modgen = 0;
1.164     thorpej   544:
1.294     darran    545:        SDT_PROBE(proc,,,exec, path, 0, 0, 0, 0);
                    546:
1.149     christos  547:        /*
1.269     christos  548:         * Check if we have exceeded our number of processes limit.
                    549:         * This is so that we handle the case where a root daemon
                    550:         * forked, ran setuid to become the desired user and is trying
                    551:         * to exec. The obvious place to do the reference counting check
                    552:         * is setuid(), but we don't do the reference counting check there
                    553:         * like other OS's do because then all the programs that use setuid()
                    554:         * must be modified to check the return code of setuid() and exit().
                    555:         * It is dangerous to make setuid() fail, because it fails open and
                    556:         * the program will continue to run as root. If we make it succeed
                    557:         * and return an error code, again we are not enforcing the limit.
                    558:         * The best place to enforce the limit is here, when the process tries
                    559:         * to execute a new image, because eventually the process will need
                    560:         * to call exec in order to do something useful.
                    561:         */
1.282     ad        562:  retry:
1.287     christos  563:        if ((p->p_flag & PK_SUGID) && kauth_authorize_generic(l->l_cred,
                    564:            KAUTH_GENERIC_ISSUSER, NULL) != 0 && chgproccnt(kauth_cred_getuid(
                    565:            l->l_cred), 0) > p->p_rlimit[RLIMIT_NPROC].rlim_cur)
1.269     christos  566:                return EAGAIN;
                    567:
1.279     wrstuden  568:        oldlwpflags = l->l_flag & (LW_SA | LW_SA_UPCALL);
                    569:        if (l->l_flag & LW_SA) {
                    570:                lwp_lock(l);
                    571:                l->l_flag &= ~(LW_SA | LW_SA_UPCALL);
                    572:                lwp_unlock(l);
                    573:        }
                    574:
1.269     christos  575:        /*
1.237     ad        576:         * Drain existing references and forbid new ones.  The process
                    577:         * should be left alone until we're done here.  This is necessary
                    578:         * to avoid race conditions - e.g. in ptrace() - that might allow
                    579:         * a local user to illicitly obtain elevated privileges.
                    580:         */
1.252     ad        581:        rw_enter(&p->p_reflock, RW_WRITER);
1.149     christos  582:
1.138     lukem     583:        base_vcp = NULL;
1.55      cgd       584:        /*
1.129     jdolecek  585:         * Init the namei data to point the file user's program name.
                    586:         * This is done here rather than in check_exec(), so that it's
                    587:         * possible to override this settings if any of makecmd/probe
                    588:         * functions call check_exec() recursively - for example,
                    589:         * see exec_script_makecmds().
                    590:         */
1.294.2.3  rmind     591:        error = pathbuf_copyin(path, &pb);
1.248     christos  592:        if (error) {
1.294.2.4  rmind     593:                DPRINTF(("%s: pathbuf_copyin path @%p %d\n", __func__,
                    594:                    path, error));
1.200     elad      595:                goto clrflg;
1.248     christos  596:        }
1.294.2.3  rmind     597:        pathstring = pathbuf_stringcopy_get(pb);
1.294.2.1  rmind     598:        resolvedpathbuf = PNBUF_GET();
                    599: #ifdef DIAGNOSTIC
                    600:        strcpy(resolvedpathbuf, "/wrong");
                    601: #endif
1.55      cgd       602:
                    603:        /*
                    604:         * initialize the fields of the exec package.
                    605:         */
1.204     cube      606:        pack.ep_name = path;
1.294.2.3  rmind     607:        pack.ep_kname = pathstring;
1.294.2.1  rmind     608:        pack.ep_resolvedname = resolvedpathbuf;
1.265     yamt      609:        pack.ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP);
1.55      cgd       610:        pack.ep_hdrlen = exec_maxhdrsz;
                    611:        pack.ep_hdrvalid = 0;
1.67      christos  612:        pack.ep_emul_arg = NULL;
1.55      cgd       613:        pack.ep_vmcmds.evs_cnt = 0;
                    614:        pack.ep_vmcmds.evs_used = 0;
                    615:        pack.ep_vap = &attr;
                    616:        pack.ep_flags = 0;
1.244     dsl       617:        pack.ep_emul_root = NULL;
                    618:        pack.ep_interp = NULL;
                    619:        pack.ep_esch = NULL;
1.273     ad        620:        pack.ep_pax_flags = 0;
1.55      cgd       621:
1.237     ad        622:        rw_enter(&exec_lock, RW_READER);
1.130     jdolecek  623:
1.55      cgd       624:        /* see if we can run it. */
1.294.2.3  rmind     625:        if ((error = check_exec(l, &pack, pb)) != 0) {
1.261     xtraeme   626:                if (error != ENOENT) {
1.294.2.4  rmind     627:                        DPRINTF(("%s: check exec failed %d\n",
                    628:                            __func__, error));
1.261     xtraeme   629:                }
1.55      cgd       630:                goto freehdr;
1.248     christos  631:        }
1.55      cgd       632:
                    633:        /* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */
                    634:
                    635:        /* allocate an argument buffer */
1.277     ad        636:        argp = pool_get(&exec_pool, PR_WAITOK);
                    637:        KASSERT(argp != NULL);
1.55      cgd       638:        dp = argp;
                    639:        argc = 0;
                    640:
                    641:        /* copy the fake args list, if there's one, freeing it as we go */
                    642:        if (pack.ep_flags & EXEC_HASARGL) {
                    643:                tmpfap = pack.ep_fa;
1.265     yamt      644:                while (tmpfap->fa_arg != NULL) {
                    645:                        const char *cp;
1.55      cgd       646:
1.265     yamt      647:                        cp = tmpfap->fa_arg;
1.55      cgd       648:                        while (*cp)
                    649:                                *dp++ = *cp++;
1.276     ad        650:                        *dp++ = '\0';
1.290     dsl       651:                        ktrexecarg(tmpfap->fa_arg, cp - tmpfap->fa_arg);
1.55      cgd       652:
1.265     yamt      653:                        kmem_free(tmpfap->fa_arg, tmpfap->fa_len);
1.55      cgd       654:                        tmpfap++; argc++;
                    655:                }
1.265     yamt      656:                kmem_free(pack.ep_fa, pack.ep_fa_len);
1.55      cgd       657:                pack.ep_flags &= ~EXEC_HASARGL;
                    658:        }
                    659:
                    660:        /* Now get argv & environment */
1.204     cube      661:        if (args == NULL) {
1.294.2.4  rmind     662:                DPRINTF(("%s: null args\n", __func__));
1.55      cgd       663:                error = EINVAL;
                    664:                goto bad;
                    665:        }
1.204     cube      666:        /* 'i' will index the argp/envp element to be retrieved */
                    667:        i = 0;
1.55      cgd       668:        if (pack.ep_flags & EXEC_SKIPARG)
1.204     cube      669:                i++;
1.55      cgd       670:
                    671:        while (1) {
                    672:                len = argp + ARG_MAX - dp;
1.248     christos  673:                if ((error = (*fetch_element)(args, i, &sp)) != 0) {
1.294.2.4  rmind     674:                        DPRINTF(("%s: fetch_element args %d\n",
                    675:                            __func__, error));
1.55      cgd       676:                        goto bad;
1.248     christos  677:                }
1.55      cgd       678:                if (!sp)
                    679:                        break;
1.74      christos  680:                if ((error = copyinstr(sp, dp, len, &len)) != 0) {
1.294.2.4  rmind     681:                        DPRINTF(("%s: copyinstr args %d\n", __func__, error));
1.55      cgd       682:                        if (error == ENAMETOOLONG)
                    683:                                error = E2BIG;
                    684:                        goto bad;
                    685:                }
1.247     ad        686:                ktrexecarg(dp, len - 1);
1.55      cgd       687:                dp += len;
1.204     cube      688:                i++;
1.55      cgd       689:                argc++;
                    690:        }
                    691:
                    692:        envc = 0;
1.74      christos  693:        /* environment need not be there */
1.204     cube      694:        if (envs != NULL) {
                    695:                i = 0;
1.55      cgd       696:                while (1) {
                    697:                        len = argp + ARG_MAX - dp;
1.248     christos  698:                        if ((error = (*fetch_element)(envs, i, &sp)) != 0) {
1.294.2.4  rmind     699:                                DPRINTF(("%s: fetch_element env %d\n",
                    700:                                    __func__, error));
1.55      cgd       701:                                goto bad;
1.248     christos  702:                        }
1.55      cgd       703:                        if (!sp)
                    704:                                break;
1.74      christos  705:                        if ((error = copyinstr(sp, dp, len, &len)) != 0) {
1.294.2.4  rmind     706:                                DPRINTF(("%s: copyinstr env %d\n",
                    707:                                    __func__, error));
1.55      cgd       708:                                if (error == ENAMETOOLONG)
                    709:                                        error = E2BIG;
                    710:                                goto bad;
                    711:                        }
1.247     ad        712:                        ktrexecenv(dp, len - 1);
1.55      cgd       713:                        dp += len;
1.204     cube      714:                        i++;
1.55      cgd       715:                        envc++;
                    716:                }
                    717:        }
1.61      mycroft   718:
                    719:        dp = (char *) ALIGN(dp);
1.55      cgd       720:
1.244     dsl       721:        szsigcode = pack.ep_esch->es_emul->e_esigcode -
                    722:            pack.ep_esch->es_emul->e_sigcode;
1.65      fvdl      723:
1.267     dsl       724: #ifdef __MACHINE_STACK_GROWS_UP
                    725: /* See big comment lower down */
                    726: #define        RTLD_GAP        32
                    727: #else
                    728: #define        RTLD_GAP        0
                    729: #endif
                    730:
1.55      cgd       731:        /* Now check if args & environ fit into new stack */
1.294.2.3  rmind     732:        if (pack.ep_flags & EXEC_32) {
                    733:                aip = &arginfo32;
                    734:                ps_strings_sz = sizeof(struct ps_strings32);
1.244     dsl       735:                len = ((argc + envc + 2 + pack.ep_esch->es_arglen) *
1.267     dsl       736:                    sizeof(int) + sizeof(int) + dp + RTLD_GAP +
1.294.2.3  rmind     737:                    szsigcode + ps_strings_sz + STACK_PTHREADSPACE)
1.188     chs       738:                    - argp;
1.294.2.3  rmind     739:        } else {
                    740:                aip = &arginfo;
                    741:                ps_strings_sz = sizeof(struct ps_strings);
1.244     dsl       742:                len = ((argc + envc + 2 + pack.ep_esch->es_arglen) *
1.267     dsl       743:                    sizeof(char *) + sizeof(int) + dp + RTLD_GAP +
1.294.2.3  rmind     744:                    szsigcode + ps_strings_sz + STACK_PTHREADSPACE)
1.188     chs       745:                    - argp;
1.294.2.3  rmind     746:        }
1.67      christos  747:
1.262     elad      748: #ifdef PAX_ASLR
                    749:        if (pax_aslr_active(l))
                    750:                len += (arc4random() % PAGE_SIZE);
                    751: #endif /* PAX_ASLR */
                    752:
1.243     matt      753: #ifdef STACKLALIGN     /* arm, etc. */
                    754:        len = STACKALIGN(len);  /* make the stack "safely" aligned */
                    755: #else
1.55      cgd       756:        len = ALIGN(len);       /* make the stack "safely" aligned */
1.243     matt      757: #endif
1.55      cgd       758:
                    759:        if (len > pack.ep_ssize) { /* in effect, compare to initial limit */
1.294.2.4  rmind     760:                DPRINTF(("%s: stack limit exceeded %zu\n", __func__, len));
1.55      cgd       761:                error = ENOMEM;
                    762:                goto bad;
                    763:        }
                    764:
1.237     ad        765:        /* Get rid of other LWPs. */
1.279     wrstuden  766:        if (p->p_sa || p->p_nlwps > 1) {
1.272     ad        767:                mutex_enter(p->p_lock);
1.237     ad        768:                exit_lwps(l);
1.272     ad        769:                mutex_exit(p->p_lock);
1.237     ad        770:        }
1.164     thorpej   771:        KDASSERT(p->p_nlwps == 1);
                    772:
1.253     ad        773:        /* Destroy any lwpctl info. */
                    774:        if (p->p_lwpctl != NULL)
                    775:                lwp_ctl_exit();
                    776:
1.279     wrstuden  777: #ifdef KERN_SA
                    778:        /* Release any SA state. */
                    779:        if (p->p_sa)
                    780:                sa_release(p);
                    781: #endif /* KERN_SA */
                    782:
1.164     thorpej   783:        /* Remove POSIX timers */
                    784:        timers_free(p, TIMERS_POSIX);
                    785:
1.55      cgd       786:        /* adjust "active stack depth" for process VSZ */
                    787:        pack.ep_ssize = len;    /* maybe should go elsewhere, but... */
                    788:
1.86      thorpej   789:        /*
                    790:         * Do whatever is necessary to prepare the address space
                    791:         * for remapping.  Note that this might replace the current
                    792:         * vmspace with another!
                    793:         */
1.164     thorpej   794:        uvmspace_exec(l, pack.ep_vm_minaddr, pack.ep_vm_maxaddr);
1.55      cgd       795:
1.186     chs       796:        /* record proc's vnode, for use by procfs and others */
                    797:         if (p->p_textvp)
                    798:                 vrele(p->p_textvp);
1.293     pooka     799:        vref(pack.ep_vp);
1.186     chs       800:        p->p_textvp = pack.ep_vp;
                    801:
1.55      cgd       802:        /* Now map address space */
1.86      thorpej   803:        vm = p->p_vmspace;
1.241     dogcow    804:        vm->vm_taddr = (void *)pack.ep_taddr;
1.55      cgd       805:        vm->vm_tsize = btoc(pack.ep_tsize);
1.241     dogcow    806:        vm->vm_daddr = (void*)pack.ep_daddr;
1.55      cgd       807:        vm->vm_dsize = btoc(pack.ep_dsize);
                    808:        vm->vm_ssize = btoc(pack.ep_ssize);
1.288     mrg       809:        vm->vm_issize = 0;
1.241     dogcow    810:        vm->vm_maxsaddr = (void *)pack.ep_maxsaddr;
                    811:        vm->vm_minsaddr = (void *)pack.ep_minsaddr;
1.55      cgd       812:
1.260     christos  813: #ifdef PAX_ASLR
                    814:        pax_aslr_init(l, vm);
                    815: #endif /* PAX_ASLR */
                    816:
1.55      cgd       817:        /* create the new process's VM space by running the vmcmds */
                    818: #ifdef DIAGNOSTIC
                    819:        if (pack.ep_vmcmds.evs_used == 0)
1.294.2.4  rmind     820:                panic("%s: no vmcmds", __func__);
1.55      cgd       821: #endif
                    822:        for (i = 0; i < pack.ep_vmcmds.evs_used && !error; i++) {
                    823:                struct exec_vmcmd *vcp;
                    824:
                    825:                vcp = &pack.ep_vmcmds.evs_cmds[i];
1.114     matt      826:                if (vcp->ev_flags & VMCMD_RELATIVE) {
                    827: #ifdef DIAGNOSTIC
                    828:                        if (base_vcp == NULL)
1.294.2.4  rmind     829:                                panic("%s: relative vmcmd with no base",
                    830:                                    __func__);
1.114     matt      831:                        if (vcp->ev_flags & VMCMD_BASE)
1.294.2.4  rmind     832:                                panic("%s: illegal base & relative vmcmd",
                    833:                                    __func__);
1.114     matt      834: #endif
                    835:                        vcp->ev_addr += base_vcp->ev_addr;
                    836:                }
1.212     christos  837:                error = (*vcp->ev_proc)(l, vcp);
1.143     christos  838: #ifdef DEBUG_EXEC
1.111     matt      839:                if (error) {
1.248     christos  840:                        size_t j;
1.143     christos  841:                        struct exec_vmcmd *vp = &pack.ep_vmcmds.evs_cmds[0];
1.294.2.3  rmind     842:                        uprintf("vmcmds %zu/%u, error %d\n", i,
                    843:                            pack.ep_vmcmds.evs_used, error);
1.143     christos  844:                        for (j = 0; j <= i; j++)
1.294.2.3  rmind     845:                                uprintf("vmcmd[%zu] = vmcmd_map_%s %#"
                    846:                                    PRIxVADDR"/%#"PRIxVSIZE" fd@%#"
                    847:                                    PRIxVSIZE" prot=0%o flags=%d\n", j,
                    848:                                    vp[j].ev_proc == vmcmd_map_pagedvn ?
                    849:                                    "pagedvn" :
                    850:                                    vp[j].ev_proc == vmcmd_map_readvn ?
                    851:                                    "readvn" :
                    852:                                    vp[j].ev_proc == vmcmd_map_zero ?
                    853:                                    "zero" : "*unknown*",
                    854:                                    vp[j].ev_addr, vp[j].ev_len,
1.143     christos  855:                                    vp[j].ev_offset, vp[j].ev_prot,
                    856:                                    vp[j].ev_flags);
1.111     matt      857:                }
1.143     christos  858: #endif /* DEBUG_EXEC */
1.114     matt      859:                if (vcp->ev_flags & VMCMD_BASE)
                    860:                        base_vcp = vcp;
1.55      cgd       861:        }
                    862:
                    863:        /* free the vmspace-creation commands, and release their references */
                    864:        kill_vmcmds(&pack.ep_vmcmds);
                    865:
1.186     chs       866:        vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
1.254     pooka     867:        VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred);
1.186     chs       868:        vput(pack.ep_vp);
                    869:
1.55      cgd       870:        /* if an error happened, deallocate and punt */
1.111     matt      871:        if (error) {
1.294.2.4  rmind     872:                DPRINTF(("%s: vmcmd %zu failed: %d\n", __func__, i - 1, error));
1.55      cgd       873:                goto exec_abort;
1.111     matt      874:        }
1.55      cgd       875:
                    876:        /* remember information about the process */
                    877:        arginfo.ps_nargvstr = argc;
                    878:        arginfo.ps_nenvstr = envc;
                    879:
1.255     christos  880:        /* set command name & other accounting info */
1.294.2.1  rmind     881:        commandname = strrchr(pack.ep_resolvedname, '/');
                    882:        if (commandname != NULL) {
                    883:                commandname++;
                    884:        } else {
                    885:                commandname = pack.ep_resolvedname;
                    886:        }
                    887:        i = min(strlen(commandname), MAXCOMLEN);
                    888:        (void)memcpy(p->p_comm, commandname, i);
1.255     christos  889:        p->p_comm[i] = '\0';
                    890:
                    891:        dp = PNBUF_GET();
                    892:        /*
                    893:         * If the path starts with /, we don't need to do any work.
                    894:         * This handles the majority of the cases.
                    895:         * In the future perhaps we could canonicalize it?
                    896:         */
1.294.2.3  rmind     897:        if (pathstring[0] == '/')
                    898:                (void)strlcpy(pack.ep_path = dp, pathstring, MAXPATHLEN);
1.255     christos  899: #ifdef notyet
                    900:        /*
                    901:         * Although this works most of the time [since the entry was just
                    902:         * entered in the cache] we don't use it because it theoretically
                    903:         * can fail and it is not the cleanest interface, because there
                    904:         * could be races. When the namei cache is re-written, this can
                    905:         * be changed to use the appropriate function.
                    906:         */
                    907:        else if (!(error = vnode_to_path(dp, MAXPATHLEN, p->p_textvp, l, p)))
                    908:                pack.ep_path = dp;
                    909: #endif
                    910:        else {
1.256     christos  911: #ifdef notyet
1.255     christos  912:                printf("Cannot get path for pid %d [%s] (error %d)",
                    913:                    (int)p->p_pid, p->p_comm, error);
                    914: #endif
                    915:                pack.ep_path = NULL;
                    916:                PNBUF_PUT(dp);
                    917:        }
                    918:
1.163     chs       919:        stack = (char *)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
1.294.2.3  rmind     920:                STACK_PTHREADSPACE + ps_strings_sz + szsigcode),
                    921:                len - (ps_strings_sz + szsigcode));
1.267     dsl       922:
1.163     chs       923: #ifdef __MACHINE_STACK_GROWS_UP
                    924:        /*
                    925:         * The copyargs call always copies into lower addresses
                    926:         * first, moving towards higher addresses, starting with
1.183     junyoung  927:         * the stack pointer that we give.  When the stack grows
                    928:         * down, this puts argc/argv/envp very shallow on the
1.267     dsl       929:         * stack, right at the first user stack pointer.
                    930:         * When the stack grows up, the situation is reversed.
1.163     chs       931:         *
                    932:         * Normally, this is no big deal.  But the ld_elf.so _rtld()
1.183     junyoung  933:         * function expects to be called with a single pointer to
                    934:         * a region that has a few words it can stash values into,
1.163     chs       935:         * followed by argc/argv/envp.  When the stack grows down,
                    936:         * it's easy to decrement the stack pointer a little bit to
                    937:         * allocate the space for these few words and pass the new
                    938:         * stack pointer to _rtld.  When the stack grows up, however,
1.171     chs       939:         * a few words before argc is part of the signal trampoline, XXX
1.163     chs       940:         * so we have a problem.
                    941:         *
1.183     junyoung  942:         * Instead of changing how _rtld works, we take the easy way
1.267     dsl       943:         * out and steal 32 bytes before we call copyargs.
                    944:         * This extra space was allowed for when 'len' was calculated.
1.163     chs       945:         */
1.267     dsl       946:        stack += RTLD_GAP;
1.163     chs       947: #endif /* __MACHINE_STACK_GROWS_UP */
                    948:
1.55      cgd       949:        /* Now copy argc, args & environ to new stack */
1.244     dsl       950:        error = (*pack.ep_esch->es_copyargs)(l, &pack, &arginfo, &stack, argp);
1.255     christos  951:        if (pack.ep_path) {
                    952:                PNBUF_PUT(pack.ep_path);
                    953:                pack.ep_path = NULL;
                    954:        }
1.144     christos  955:        if (error) {
1.294.2.4  rmind     956:                DPRINTF(("%s: copyargs failed %d\n", __func__, error));
1.55      cgd       957:                goto exec_abort;
1.111     matt      958:        }
1.144     christos  959:        /* Move the stack back to original point */
1.163     chs       960:        stack = (char *)STACK_GROW(vm->vm_minsaddr, len);
1.55      cgd       961:
1.121     eeh       962:        /* fill process ps_strings info */
1.294.2.3  rmind     963:        p->p_psstrp = (vaddr_t)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
                    964:            STACK_PTHREADSPACE), ps_strings_sz);
                    965:
                    966:        if (pack.ep_flags & EXEC_32) {
                    967:                arginfo32.ps_argvstr = (vaddr_t)arginfo.ps_argvstr;
                    968:                arginfo32.ps_nargvstr = arginfo.ps_nargvstr;
                    969:                arginfo32.ps_envstr = (vaddr_t)arginfo.ps_envstr;
                    970:                arginfo32.ps_nenvstr = arginfo.ps_nenvstr;
                    971:        }
1.121     eeh       972:
1.55      cgd       973:        /* copy out the process's ps_strings structure */
1.294.2.3  rmind     974:        if ((error = copyout(aip, (void *)p->p_psstrp, ps_strings_sz)) != 0) {
1.294.2.4  rmind     975:                DPRINTF(("%s: ps_strings copyout %p->%p size %zu failed\n",
                    976:                    __func__, aip, (void *)p->p_psstrp, ps_strings_sz));
1.55      cgd       977:                goto exec_abort;
1.111     matt      978:        }
1.109     simonb    979:
1.294.2.3  rmind     980:        cwdexec(p);
1.270     ad        981:        fd_closeexec();         /* handle close on exec */
1.55      cgd       982:        execsigs(p);            /* reset catched signals */
1.183     junyoung  983:
1.164     thorpej   984:        l->l_ctxlink = NULL;    /* reset ucontext link */
1.55      cgd       985:
1.255     christos  986:
1.55      cgd       987:        p->p_acflag &= ~AFORK;
1.272     ad        988:        mutex_enter(p->p_lock);
1.238     pavel     989:        p->p_flag |= PK_EXEC;
1.272     ad        990:        mutex_exit(p->p_lock);
1.237     ad        991:
                    992:        /*
                    993:         * Stop profiling.
                    994:         */
                    995:        if ((p->p_stflag & PST_PROFIL) != 0) {
                    996:                mutex_spin_enter(&p->p_stmutex);
                    997:                stopprofclock(p);
                    998:                mutex_spin_exit(&p->p_stmutex);
                    999:        }
                   1000:
                   1001:        /*
1.275     ad       1002:         * It's OK to test PL_PPWAIT unlocked here, as other LWPs have
1.237     ad       1003:         * exited and exec()/exit() are the only places it will be cleared.
                   1004:         */
1.275     ad       1005:        if ((p->p_lflag & PL_PPWAIT) != 0) {
1.271     ad       1006:                mutex_enter(proc_lock);
1.294.2.3  rmind    1007:                l->l_lwpctl = NULL; /* was on loan from blocked parent */
1.275     ad       1008:                p->p_lflag &= ~PL_PPWAIT;
1.237     ad       1009:                cv_broadcast(&p->p_pptr->p_waitcv);
1.271     ad       1010:                mutex_exit(proc_lock);
1.55      cgd      1011:        }
                   1012:
                   1013:        /*
1.237     ad       1014:         * Deal with set[ug]id.  MNT_NOSUID has already been used to disable
                   1015:         * s[ug]id.  It's OK to check for PSL_TRACED here as we have blocked
                   1016:         * out additional references on the process for the moment.
1.55      cgd      1017:         */
1.237     ad       1018:        if ((p->p_slflag & PSL_TRACED) == 0 &&
1.141     thorpej  1019:
                   1020:            (((attr.va_mode & S_ISUID) != 0 &&
1.221     ad       1021:              kauth_cred_geteuid(l->l_cred) != attr.va_uid) ||
1.141     thorpej  1022:
                   1023:             ((attr.va_mode & S_ISGID) != 0 &&
1.221     ad       1024:              kauth_cred_getegid(l->l_cred) != attr.va_gid))) {
1.141     thorpej  1025:                /*
                   1026:                 * Mark the process as SUGID before we do
                   1027:                 * anything that might block.
                   1028:                 */
1.237     ad       1029:                proc_crmod_enter();
1.240     thorpej  1030:                proc_crmod_leave(NULL, NULL, true);
1.152     christos 1031:
                   1032:                /* Make sure file descriptors 0..2 are in use. */
1.270     ad       1033:                if ((error = fd_checkstd()) != 0) {
1.294.2.4  rmind    1034:                        DPRINTF(("%s: fdcheckstd failed %d\n",
                   1035:                            __func__, error));
1.152     christos 1036:                        goto exec_abort;
1.209     christos 1037:                }
1.141     thorpej  1038:
1.220     ad       1039:                /*
                   1040:                 * Copy the credential so other references don't see our
                   1041:                 * changes.
                   1042:                 */
1.221     ad       1043:                l->l_cred = kauth_cred_copy(l->l_cred);
1.55      cgd      1044: #ifdef KTRACE
                   1045:                /*
1.268     elad     1046:                 * If the persistent trace flag isn't set, turn off.
1.55      cgd      1047:                 */
1.237     ad       1048:                if (p->p_tracep) {
1.247     ad       1049:                        mutex_enter(&ktrace_lock);
1.268     elad     1050:                        if (!(p->p_traceflag & KTRFAC_PERSISTENT))
1.237     ad       1051:                                ktrderef(p);
1.247     ad       1052:                        mutex_exit(&ktrace_lock);
1.237     ad       1053:                }
1.55      cgd      1054: #endif
1.83      mycroft  1055:                if (attr.va_mode & S_ISUID)
1.221     ad       1056:                        kauth_cred_seteuid(l->l_cred, attr.va_uid);
1.83      mycroft  1057:                if (attr.va_mode & S_ISGID)
1.221     ad       1058:                        kauth_cred_setegid(l->l_cred, attr.va_gid);
1.210     christos 1059:        } else {
1.221     ad       1060:                if (kauth_cred_geteuid(l->l_cred) ==
                   1061:                    kauth_cred_getuid(l->l_cred) &&
                   1062:                    kauth_cred_getegid(l->l_cred) ==
                   1063:                    kauth_cred_getgid(l->l_cred))
1.238     pavel    1064:                        p->p_flag &= ~PK_SUGID;
1.210     christos 1065:        }
1.220     ad       1066:
                   1067:        /*
                   1068:         * Copy the credential so other references don't see our changes.
                   1069:         * Test to see if this is necessary first, since in the common case
                   1070:         * we won't need a private reference.
                   1071:         */
1.221     ad       1072:        if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
                   1073:            kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
                   1074:                l->l_cred = kauth_cred_copy(l->l_cred);
                   1075:                kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
                   1076:                kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
1.220     ad       1077:        }
1.155     gmcgarry 1078:
1.221     ad       1079:        /* Update the master credentials. */
1.227     ad       1080:        if (l->l_cred != p->p_cred) {
                   1081:                kauth_cred_t ocred;
                   1082:
                   1083:                kauth_cred_hold(l->l_cred);
1.272     ad       1084:                mutex_enter(p->p_lock);
1.227     ad       1085:                ocred = p->p_cred;
                   1086:                p->p_cred = l->l_cred;
1.272     ad       1087:                mutex_exit(p->p_lock);
1.227     ad       1088:                kauth_cred_free(ocred);
                   1089:        }
1.221     ad       1090:
1.155     gmcgarry 1091: #if defined(__HAVE_RAS)
                   1092:        /*
                   1093:         * Remove all RASs from the address space.
                   1094:         */
1.251     ad       1095:        ras_purgeall();
1.155     gmcgarry 1096: #endif
1.107     fvdl     1097:
                   1098:        doexechooks(p);
1.55      cgd      1099:
                   1100:        /* setup new registers and do misc. setup. */
1.292     matt     1101:        (*pack.ep_esch->es_emul->e_setregs)(l, &pack, (vaddr_t)stack);
1.244     dsl      1102:        if (pack.ep_esch->es_setregs)
1.292     matt     1103:                (*pack.ep_esch->es_setregs)(l, &pack, (vaddr_t)stack);
1.55      cgd      1104:
1.294.2.3  rmind    1105:        /* Provide a consistent LWP private setting */
                   1106:        (void)lwp_setprivate(l, NULL);
                   1107:
1.171     chs      1108:        /* map the process's signal trampoline code */
1.294.2.3  rmind    1109:        if ((error = exec_sigcode_map(p, pack.ep_esch->es_emul)) != 0) {
1.294.2.4  rmind    1110:                DPRINTF(("%s: map sigcode failed %d\n", __func__, error));
1.171     chs      1111:                goto exec_abort;
1.209     christos 1112:        }
1.171     chs      1113:
1.277     ad       1114:        pool_put(&exec_pool, argp);
1.276     ad       1115:
                   1116:        /* notify others that we exec'd */
                   1117:        KNOTE(&p->p_klist, NOTE_EXEC);
                   1118:
1.265     yamt     1119:        kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1.122     jdolecek 1120:
1.294     darran   1121:        SDT_PROBE(proc,,,exec_success, path, 0, 0, 0, 0);
                   1122:
1.244     dsl      1123:        /* The emulation root will usually have been found when we looked
                   1124:         * for the elf interpreter (or similar), if not look now. */
                   1125:        if (pack.ep_esch->es_emul->e_path != NULL && pack.ep_emul_root == NULL)
                   1126:                emul_find_root(l, &pack);
                   1127:
                   1128:        /* Any old emulation root got removed by fdcloseexec */
1.259     ad       1129:        rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
1.244     dsl      1130:        p->p_cwdi->cwdi_edir = pack.ep_emul_root;
1.259     ad       1131:        rw_exit(&p->p_cwdi->cwdi_lock);
1.244     dsl      1132:        pack.ep_emul_root = NULL;
                   1133:        if (pack.ep_interp != NULL)
                   1134:                vrele(pack.ep_interp);
                   1135:
1.122     jdolecek 1136:        /*
1.194     peter    1137:         * Call emulation specific exec hook. This can setup per-process
1.122     jdolecek 1138:         * p->p_emuldata or do any other per-process stuff an emulation needs.
                   1139:         *
                   1140:         * If we are executing process of different emulation than the
                   1141:         * original forked process, call e_proc_exit() of the old emulation
                   1142:         * first, then e_proc_exec() of new emulation. If the emulation is
                   1143:         * same, the exec hook code should deallocate any old emulation
                   1144:         * resources held previously by this process.
                   1145:         */
1.124     jdolecek 1146:        if (p->p_emul && p->p_emul->e_proc_exit
1.244     dsl      1147:            && p->p_emul != pack.ep_esch->es_emul)
1.122     jdolecek 1148:                (*p->p_emul->e_proc_exit)(p);
                   1149:
1.123     jdolecek 1150:        /*
1.294.2.3  rmind    1151:         * This is now LWP 1.
                   1152:         */
                   1153:        mutex_enter(p->p_lock);
                   1154:        p->p_nlwpid = 1;
                   1155:        l->l_lid = 1;
                   1156:        mutex_exit(p->p_lock);
                   1157:
                   1158:        /*
1.123     jdolecek 1159:         * Call exec hook. Emulation code may NOT store reference to anything
                   1160:         * from &pack.
                   1161:         */
1.244     dsl      1162:         if (pack.ep_esch->es_emul->e_proc_exec)
                   1163:                 (*pack.ep_esch->es_emul->e_proc_exec)(p, &pack);
1.122     jdolecek 1164:
                   1165:        /* update p_emul, the old value is no longer needed */
1.244     dsl      1166:        p->p_emul = pack.ep_esch->es_emul;
1.148     thorpej  1167:
                   1168:        /* ...and the same for p_execsw */
1.244     dsl      1169:        p->p_execsw = pack.ep_esch;
1.148     thorpej  1170:
1.133     mycroft  1171: #ifdef __HAVE_SYSCALL_INTERN
                   1172:        (*p->p_emul->e_syscall_intern)(p);
                   1173: #endif
1.247     ad       1174:        ktremul();
1.85      mycroft  1175:
1.252     ad       1176:        /* Allow new references from the debugger/procfs. */
                   1177:        rw_exit(&p->p_reflock);
1.237     ad       1178:        rw_exit(&exec_lock);
1.162     manu     1179:
1.271     ad       1180:        mutex_enter(proc_lock);
1.237     ad       1181:
                   1182:        if ((p->p_slflag & (PSL_TRACED|PSL_SYSCALL)) == PSL_TRACED) {
                   1183:                KSI_INIT_EMPTY(&ksi);
                   1184:                ksi.ksi_signo = SIGTRAP;
                   1185:                ksi.ksi_lid = l->l_lid;
                   1186:                kpsignal(p, &ksi, NULL);
                   1187:        }
1.162     manu     1188:
1.237     ad       1189:        if (p->p_sflag & PS_STOPEXEC) {
                   1190:                KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1.175     dsl      1191:                p->p_pptr->p_nstopchild++;
1.237     ad       1192:                p->p_pptr->p_waited = 0;
1.272     ad       1193:                mutex_enter(p->p_lock);
1.237     ad       1194:                ksiginfo_queue_init(&kq);
                   1195:                sigclearall(p, &contsigmask, &kq);
                   1196:                lwp_lock(l);
                   1197:                l->l_stat = LSSTOP;
1.162     manu     1198:                p->p_stat = SSTOP;
1.164     thorpej  1199:                p->p_nrlwps--;
1.294.2.3  rmind    1200:                lwp_unlock(l);
1.272     ad       1201:                mutex_exit(p->p_lock);
1.271     ad       1202:                mutex_exit(proc_lock);
1.294.2.3  rmind    1203:                lwp_lock(l);
1.245     yamt     1204:                mi_switch(l);
1.237     ad       1205:                ksiginfo_queue_drain(&kq);
                   1206:                KERNEL_LOCK(l->l_biglocks, l);
                   1207:        } else {
1.271     ad       1208:                mutex_exit(proc_lock);
1.162     manu     1209:        }
                   1210:
1.294.2.3  rmind    1211:        pathbuf_stringcopy_put(pb, pathstring);
                   1212:        pathbuf_destroy(pb);
1.294.2.1  rmind    1213:        PNBUF_PUT(resolvedpathbuf);
1.85      mycroft  1214:        return (EJUSTRETURN);
1.55      cgd      1215:
1.138     lukem    1216:  bad:
1.55      cgd      1217:        /* free the vmspace-creation commands, and release their references */
                   1218:        kill_vmcmds(&pack.ep_vmcmds);
                   1219:        /* kill any opened file descriptor, if necessary */
                   1220:        if (pack.ep_flags & EXEC_HASFD) {
                   1221:                pack.ep_flags &= ~EXEC_HASFD;
1.270     ad       1222:                fd_close(pack.ep_fd);
1.55      cgd      1223:        }
                   1224:        /* close and put the exec'd file */
1.99      wrstuden 1225:        vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
1.254     pooka    1226:        VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred);
1.99      wrstuden 1227:        vput(pack.ep_vp);
1.277     ad       1228:        pool_put(&exec_pool, argp);
1.55      cgd      1229:
1.138     lukem    1230:  freehdr:
1.265     yamt     1231:        kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1.244     dsl      1232:        if (pack.ep_emul_root != NULL)
                   1233:                vrele(pack.ep_emul_root);
                   1234:        if (pack.ep_interp != NULL)
                   1235:                vrele(pack.ep_interp);
1.200     elad     1236:
1.274     ad       1237:        rw_exit(&exec_lock);
                   1238:
1.294.2.3  rmind    1239:        pathbuf_stringcopy_put(pb, pathstring);
                   1240:        pathbuf_destroy(pb);
1.294.2.1  rmind    1241:        PNBUF_PUT(resolvedpathbuf);
                   1242:
1.200     elad     1243:  clrflg:
1.279     wrstuden 1244:        lwp_lock(l);
                   1245:        l->l_flag |= oldlwpflags;
                   1246:        lwp_unlock(l);
1.252     ad       1247:        rw_exit(&p->p_reflock);
1.130     jdolecek 1248:
1.282     ad       1249:        if (modgen != module_gen && error == ENOEXEC) {
                   1250:                modgen = module_gen;
                   1251:                exec_autoload();
                   1252:                goto retry;
                   1253:        }
                   1254:
1.294     darran   1255:        SDT_PROBE(proc,,,exec_failure, error, 0, 0, 0, 0);
1.55      cgd      1256:        return error;
                   1257:
1.138     lukem    1258:  exec_abort:
1.294     darran   1259:        SDT_PROBE(proc,,,exec_failure, error, 0, 0, 0, 0);
1.252     ad       1260:        rw_exit(&p->p_reflock);
1.237     ad       1261:        rw_exit(&exec_lock);
1.130     jdolecek 1262:
1.294.2.3  rmind    1263:        pathbuf_stringcopy_put(pb, pathstring);
                   1264:        pathbuf_destroy(pb);
1.294.2.1  rmind    1265:        PNBUF_PUT(resolvedpathbuf);
                   1266:
1.55      cgd      1267:        /*
                   1268:         * the old process doesn't exist anymore.  exit gracefully.
                   1269:         * get rid of the (new) address space we have created, if any, get rid
                   1270:         * of our namei data and vnode, and exit noting failure
                   1271:         */
1.88      mrg      1272:        uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
                   1273:                VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
1.73      mycroft  1274:        if (pack.ep_emul_arg)
1.284     cegger   1275:                free(pack.ep_emul_arg, M_TEMP);
1.277     ad       1276:        pool_put(&exec_pool, argp);
1.265     yamt     1277:        kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1.244     dsl      1278:        if (pack.ep_emul_root != NULL)
                   1279:                vrele(pack.ep_emul_root);
                   1280:        if (pack.ep_interp != NULL)
                   1281:                vrele(pack.ep_interp);
1.237     ad       1282:
1.252     ad       1283:        /* Acquire the sched-state mutex (exit1() will release it). */
1.272     ad       1284:        mutex_enter(p->p_lock);
1.164     thorpej  1285:        exit1(l, W_EXITCODE(error, SIGABRT));
1.55      cgd      1286:
                   1287:        /* NOTREACHED */
                   1288:        return 0;
1.67      christos 1289: }
                   1290:
1.144     christos 1291: int
1.231     yamt     1292: copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
                   1293:     char **stackp, void *argp)
1.67      christos 1294: {
1.138     lukem    1295:        char    **cpp, *dp, *sp;
                   1296:        size_t  len;
                   1297:        void    *nullp;
                   1298:        long    argc, envc;
1.144     christos 1299:        int     error;
1.138     lukem    1300:
1.144     christos 1301:        cpp = (char **)*stackp;
1.138     lukem    1302:        nullp = NULL;
                   1303:        argc = arginfo->ps_nargvstr;
                   1304:        envc = arginfo->ps_nenvstr;
1.294.2.3  rmind    1305:        if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0) {
1.294.2.4  rmind    1306:                COPYPRINTF("", cpp - 1, sizeof(argc));
1.144     christos 1307:                return error;
1.294.2.3  rmind    1308:        }
1.67      christos 1309:
1.244     dsl      1310:        dp = (char *) (cpp + argc + envc + 2 + pack->ep_esch->es_arglen);
1.67      christos 1311:        sp = argp;
                   1312:
                   1313:        /* XXX don't copy them out, remap them! */
1.69      mycroft  1314:        arginfo->ps_argvstr = cpp; /* remember location of argv for later */
1.67      christos 1315:
1.294.2.3  rmind    1316:        for (; --argc >= 0; sp += len, dp += len) {
                   1317:                if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
1.294.2.4  rmind    1318:                        COPYPRINTF("", cpp - 1, sizeof(dp));
1.294.2.3  rmind    1319:                        return error;
                   1320:                }
                   1321:                if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
1.294.2.4  rmind    1322:                        COPYPRINTF("str", dp, (size_t)ARG_MAX);
1.144     christos 1323:                        return error;
1.294.2.3  rmind    1324:                }
                   1325:        }
1.67      christos 1326:
1.294.2.3  rmind    1327:        if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
1.294.2.4  rmind    1328:                COPYPRINTF("", cpp - 1, sizeof(nullp));
1.144     christos 1329:                return error;
1.294.2.3  rmind    1330:        }
1.67      christos 1331:
1.69      mycroft  1332:        arginfo->ps_envstr = cpp; /* remember location of envp for later */
1.67      christos 1333:
1.294.2.3  rmind    1334:        for (; --envc >= 0; sp += len, dp += len) {
                   1335:                if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
1.294.2.4  rmind    1336:                        COPYPRINTF("", cpp - 1, sizeof(dp));
1.294.2.3  rmind    1337:                        return error;
                   1338:                }
                   1339:                if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
1.294.2.4  rmind    1340:                        COPYPRINTF("str", dp, (size_t)ARG_MAX);
1.144     christos 1341:                        return error;
1.294.2.3  rmind    1342:                }
                   1343:        }
1.67      christos 1344:
1.294.2.3  rmind    1345:        if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
1.294.2.4  rmind    1346:                COPYPRINTF("", cpp - 1, sizeof(nullp));
1.144     christos 1347:                return error;
1.294.2.3  rmind    1348:        }
1.67      christos 1349:
1.144     christos 1350:        *stackp = (char *)cpp;
                   1351:        return 0;
1.55      cgd      1352: }
1.130     jdolecek 1353:
                   1354:
                   1355: /*
1.282     ad       1356:  * Add execsw[] entries.
1.130     jdolecek 1357:  */
                   1358: int
1.282     ad       1359: exec_add(struct execsw *esp, int count)
1.130     jdolecek 1360: {
1.282     ad       1361:        struct exec_entry       *it;
                   1362:        int                     i;
1.130     jdolecek 1363:
1.283     ad       1364:        if (count == 0) {
                   1365:                return 0;
                   1366:        }
1.130     jdolecek 1367:
1.282     ad       1368:        /* Check for duplicates. */
1.237     ad       1369:        rw_enter(&exec_lock, RW_WRITER);
1.282     ad       1370:        for (i = 0; i < count; i++) {
                   1371:                LIST_FOREACH(it, &ex_head, ex_list) {
                   1372:                        /* assume unique (makecmds, probe_func, emulation) */
                   1373:                        if (it->ex_sw->es_makecmds == esp[i].es_makecmds &&
                   1374:                            it->ex_sw->u.elf_probe_func ==
                   1375:                            esp[i].u.elf_probe_func &&
                   1376:                            it->ex_sw->es_emul == esp[i].es_emul) {
                   1377:                                rw_exit(&exec_lock);
                   1378:                                return EEXIST;
1.130     jdolecek 1379:                        }
                   1380:                }
                   1381:        }
                   1382:
1.282     ad       1383:        /* Allocate new entries. */
                   1384:        for (i = 0; i < count; i++) {
                   1385:                it = kmem_alloc(sizeof(*it), KM_SLEEP);
                   1386:                it->ex_sw = &esp[i];
                   1387:                LIST_INSERT_HEAD(&ex_head, it, ex_list);
1.130     jdolecek 1388:        }
                   1389:
                   1390:        /* update execsw[] */
                   1391:        exec_init(0);
1.237     ad       1392:        rw_exit(&exec_lock);
1.282     ad       1393:        return 0;
1.130     jdolecek 1394: }
                   1395:
                   1396: /*
                   1397:  * Remove execsw[] entry.
                   1398:  */
                   1399: int
1.282     ad       1400: exec_remove(struct execsw *esp, int count)
1.130     jdolecek 1401: {
1.282     ad       1402:        struct exec_entry       *it, *next;
                   1403:        int                     i;
                   1404:        const struct proclist_desc *pd;
                   1405:        proc_t                  *p;
                   1406:
1.283     ad       1407:        if (count == 0) {
                   1408:                return 0;
                   1409:        }
1.130     jdolecek 1410:
1.282     ad       1411:        /* Abort if any are busy. */
1.237     ad       1412:        rw_enter(&exec_lock, RW_WRITER);
1.282     ad       1413:        for (i = 0; i < count; i++) {
                   1414:                mutex_enter(proc_lock);
                   1415:                for (pd = proclists; pd->pd_list != NULL; pd++) {
                   1416:                        PROCLIST_FOREACH(p, pd->pd_list) {
                   1417:                                if (p->p_execsw == &esp[i]) {
                   1418:                                        mutex_exit(proc_lock);
                   1419:                                        rw_exit(&exec_lock);
                   1420:                                        return EBUSY;
                   1421:                                }
                   1422:                        }
                   1423:                }
                   1424:                mutex_exit(proc_lock);
                   1425:        }
1.130     jdolecek 1426:
1.282     ad       1427:        /* None are busy, so remove them all. */
                   1428:        for (i = 0; i < count; i++) {
                   1429:                for (it = LIST_FIRST(&ex_head); it != NULL; it = next) {
                   1430:                        next = LIST_NEXT(it, ex_list);
                   1431:                        if (it->ex_sw == &esp[i]) {
                   1432:                                LIST_REMOVE(it, ex_list);
                   1433:                                kmem_free(it, sizeof(*it));
                   1434:                                break;
                   1435:                        }
                   1436:                }
1.130     jdolecek 1437:        }
                   1438:
                   1439:        /* update execsw[] */
                   1440:        exec_init(0);
1.237     ad       1441:        rw_exit(&exec_lock);
1.282     ad       1442:        return 0;
1.130     jdolecek 1443: }
                   1444:
                   1445: /*
                   1446:  * Initialize exec structures. If init_boot is true, also does necessary
                   1447:  * one-time initialization (it's called from main() that way).
1.147     jdolecek 1448:  * Once system is multiuser, this should be called with exec_lock held,
1.130     jdolecek 1449:  * i.e. via exec_{add|remove}().
                   1450:  */
                   1451: int
1.138     lukem    1452: exec_init(int init_boot)
1.130     jdolecek 1453: {
1.282     ad       1454:        const struct execsw     **sw;
                   1455:        struct exec_entry       *ex;
                   1456:        SLIST_HEAD(,exec_entry) first;
                   1457:        SLIST_HEAD(,exec_entry) any;
                   1458:        SLIST_HEAD(,exec_entry) last;
                   1459:        int                     i, sz;
1.130     jdolecek 1460:
                   1461:        if (init_boot) {
                   1462:                /* do one-time initializations */
1.237     ad       1463:                rw_init(&exec_lock);
1.259     ad       1464:                mutex_init(&sigobject_lock, MUTEX_DEFAULT, IPL_NONE);
1.277     ad       1465:                pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
                   1466:                    "execargs", &exec_palloc, IPL_NONE);
                   1467:                pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
1.282     ad       1468:        } else {
                   1469:                KASSERT(rw_write_held(&exec_lock));
                   1470:        }
1.130     jdolecek 1471:
1.282     ad       1472:        /* Sort each entry onto the appropriate queue. */
                   1473:        SLIST_INIT(&first);
                   1474:        SLIST_INIT(&any);
                   1475:        SLIST_INIT(&last);
                   1476:        sz = 0;
                   1477:        LIST_FOREACH(ex, &ex_head, ex_list) {
                   1478:                switch(ex->ex_sw->es_prio) {
                   1479:                case EXECSW_PRIO_FIRST:
                   1480:                        SLIST_INSERT_HEAD(&first, ex, ex_slist);
                   1481:                        break;
                   1482:                case EXECSW_PRIO_ANY:
                   1483:                        SLIST_INSERT_HEAD(&any, ex, ex_slist);
                   1484:                        break;
                   1485:                case EXECSW_PRIO_LAST:
                   1486:                        SLIST_INSERT_HEAD(&last, ex, ex_slist);
                   1487:                        break;
                   1488:                default:
1.294.2.4  rmind    1489:                        panic("%s", __func__);
1.282     ad       1490:                        break;
1.130     jdolecek 1491:                }
1.282     ad       1492:                sz++;
1.130     jdolecek 1493:        }
                   1494:
                   1495:        /*
1.282     ad       1496:         * Create new execsw[].  Ensure we do not try a zero-sized
                   1497:         * allocation.
1.130     jdolecek 1498:         */
1.282     ad       1499:        sw = kmem_alloc(sz * sizeof(struct execsw *) + 1, KM_SLEEP);
                   1500:        i = 0;
                   1501:        SLIST_FOREACH(ex, &first, ex_slist) {
                   1502:                sw[i++] = ex->ex_sw;
                   1503:        }
                   1504:        SLIST_FOREACH(ex, &any, ex_slist) {
                   1505:                sw[i++] = ex->ex_sw;
                   1506:        }
                   1507:        SLIST_FOREACH(ex, &last, ex_slist) {
                   1508:                sw[i++] = ex->ex_sw;
1.130     jdolecek 1509:        }
1.183     junyoung 1510:
1.282     ad       1511:        /* Replace old execsw[] and free used memory. */
                   1512:        if (execsw != NULL) {
                   1513:                kmem_free(__UNCONST(execsw),
                   1514:                    nexecs * sizeof(struct execsw *) + 1);
1.130     jdolecek 1515:        }
1.282     ad       1516:        execsw = sw;
                   1517:        nexecs = sz;
1.130     jdolecek 1518:
1.282     ad       1519:        /* Figure out the maximum size of an exec header. */
                   1520:        exec_maxhdrsz = sizeof(int);
1.130     jdolecek 1521:        for (i = 0; i < nexecs; i++) {
                   1522:                if (execsw[i]->es_hdrsz > exec_maxhdrsz)
                   1523:                        exec_maxhdrsz = execsw[i]->es_hdrsz;
                   1524:        }
                   1525:
                   1526:        return 0;
                   1527: }
1.171     chs      1528:
                   1529: static int
                   1530: exec_sigcode_map(struct proc *p, const struct emul *e)
                   1531: {
                   1532:        vaddr_t va;
                   1533:        vsize_t sz;
                   1534:        int error;
                   1535:        struct uvm_object *uobj;
                   1536:
1.184     drochner 1537:        sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
                   1538:
                   1539:        if (e->e_sigobject == NULL || sz == 0) {
1.171     chs      1540:                return 0;
                   1541:        }
                   1542:
                   1543:        /*
                   1544:         * If we don't have a sigobject for this emulation, create one.
                   1545:         *
                   1546:         * sigobject is an anonymous memory object (just like SYSV shared
                   1547:         * memory) that we keep a permanent reference to and that we map
                   1548:         * in all processes that need this sigcode. The creation is simple,
                   1549:         * we create an object, add a permanent reference to it, map it in
                   1550:         * kernel space, copy out the sigcode to it and unmap it.
1.189     jdolecek 1551:         * We map it with PROT_READ|PROT_EXEC into the process just
                   1552:         * the way sys_mmap() would map it.
1.171     chs      1553:         */
                   1554:
                   1555:        uobj = *e->e_sigobject;
                   1556:        if (uobj == NULL) {
1.259     ad       1557:                mutex_enter(&sigobject_lock);
                   1558:                if ((uobj = *e->e_sigobject) == NULL) {
                   1559:                        uobj = uao_create(sz, 0);
                   1560:                        (*uobj->pgops->pgo_reference)(uobj);
                   1561:                        va = vm_map_min(kernel_map);
                   1562:                        if ((error = uvm_map(kernel_map, &va, round_page(sz),
                   1563:                            uobj, 0, 0,
                   1564:                            UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
                   1565:                            UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
                   1566:                                printf("kernel mapping failed %d\n", error);
                   1567:                                (*uobj->pgops->pgo_detach)(uobj);
                   1568:                                mutex_exit(&sigobject_lock);
                   1569:                                return (error);
                   1570:                        }
                   1571:                        memcpy((void *)va, e->e_sigcode, sz);
1.171     chs      1572: #ifdef PMAP_NEED_PROCWR
1.259     ad       1573:                        pmap_procwr(&proc0, va, sz);
1.171     chs      1574: #endif
1.259     ad       1575:                        uvm_unmap(kernel_map, va, va + round_page(sz));
                   1576:                        *e->e_sigobject = uobj;
                   1577:                }
                   1578:                mutex_exit(&sigobject_lock);
1.171     chs      1579:        }
                   1580:
1.172     enami    1581:        /* Just a hint to uvm_map where to put it. */
1.195     fvdl     1582:        va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
                   1583:            round_page(sz));
1.187     chs      1584:
                   1585: #ifdef __alpha__
                   1586:        /*
                   1587:         * Tru64 puts /sbin/loader at the end of user virtual memory,
                   1588:         * which causes the above calculation to put the sigcode at
                   1589:         * an invalid address.  Put it just below the text instead.
                   1590:         */
1.193     jmc      1591:        if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
1.187     chs      1592:                va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
                   1593:        }
                   1594: #endif
                   1595:
1.171     chs      1596:        (*uobj->pgops->pgo_reference)(uobj);
                   1597:        error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
                   1598:                        uobj, 0, 0,
                   1599:                        UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
                   1600:                                    UVM_ADV_RANDOM, 0));
                   1601:        if (error) {
1.294.2.4  rmind    1602:                DPRINTF(("%s, %d: map %p "
1.294.2.3  rmind    1603:                    "uvm_map %#"PRIxVSIZE"@%#"PRIxVADDR" failed %d\n",
1.294.2.4  rmind    1604:                    __func__, __LINE__, &p->p_vmspace->vm_map, round_page(sz),
                   1605:                    va, error));
1.171     chs      1606:                (*uobj->pgops->pgo_detach)(uobj);
                   1607:                return (error);
                   1608:        }
                   1609:        p->p_sigctx.ps_sigcode = (void *)va;
                   1610:        return (0);
                   1611: }

CVSweb <webmaster@jp.NetBSD.org>