[BACK]Return to kern_exec.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/kern_exec.c, Revision 1.344

1.344   ! christos    1: /*     $NetBSD: kern_exec.c,v 1.343 2012/02/21 03:44:54 christos Exp $ */
1.277     ad          2:
                      3: /*-
                      4:  * Copyright (c) 2008 The NetBSD Foundation, Inc.
                      5:  * All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
                     15:  *
                     16:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     17:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     18:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     19:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     20:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     21:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     22:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     23:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     24:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     25:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     26:  * POSSIBILITY OF SUCH DAMAGE.
                     27:  */
1.55      cgd        28:
                     29: /*-
1.77      cgd        30:  * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
1.55      cgd        31:  * Copyright (C) 1992 Wolfgang Solfrank.
                     32:  * Copyright (C) 1992 TooLs GmbH.
                     33:  * All rights reserved.
                     34:  *
                     35:  * Redistribution and use in source and binary forms, with or without
                     36:  * modification, are permitted provided that the following conditions
                     37:  * are met:
                     38:  * 1. Redistributions of source code must retain the above copyright
                     39:  *    notice, this list of conditions and the following disclaimer.
                     40:  * 2. Redistributions in binary form must reproduce the above copyright
                     41:  *    notice, this list of conditions and the following disclaimer in the
                     42:  *    documentation and/or other materials provided with the distribution.
                     43:  * 3. All advertising materials mentioning features or use of this software
                     44:  *    must display the following acknowledgement:
                     45:  *     This product includes software developed by TooLs GmbH.
                     46:  * 4. The name of TooLs GmbH may not be used to endorse or promote products
                     47:  *    derived from this software without specific prior written permission.
                     48:  *
                     49:  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
                     50:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
                     51:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     52:  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
                     53:  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
                     54:  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
                     55:  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
                     56:  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
                     57:  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
                     58:  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     59:  */
1.146     lukem      60:
                     61: #include <sys/cdefs.h>
1.344   ! christos   62: __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.343 2012/02/21 03:44:54 christos Exp $");
1.89      mrg        63:
1.325     jmcneill   64: #include "opt_exec.h"
1.92      thorpej    65: #include "opt_ktrace.h"
1.285     apb        66: #include "opt_modular.h"
1.124     jdolecek   67: #include "opt_syscall_debug.h"
1.226     dogcow     68: #include "veriexec.h"
1.232     elad       69: #include "opt_pax.h"
1.55      cgd        70:
                     71: #include <sys/param.h>
                     72: #include <sys/systm.h>
                     73: #include <sys/filedesc.h>
                     74: #include <sys/kernel.h>
                     75: #include <sys/proc.h>
                     76: #include <sys/mount.h>
                     77: #include <sys/malloc.h>
1.265     yamt       78: #include <sys/kmem.h>
1.55      cgd        79: #include <sys/namei.h>
                     80: #include <sys/vnode.h>
                     81: #include <sys/file.h>
                     82: #include <sys/acct.h>
1.337     martin     83: #include <sys/atomic.h>
1.55      cgd        84: #include <sys/exec.h>
                     85: #include <sys/ktrace.h>
1.278     pooka      86: #include <sys/uidinfo.h>
1.55      cgd        87: #include <sys/wait.h>
                     88: #include <sys/mman.h>
1.155     gmcgarry   89: #include <sys/ras.h>
1.55      cgd        90: #include <sys/signalvar.h>
                     91: #include <sys/stat.h>
1.124     jdolecek   92: #include <sys/syscall.h>
1.218     elad       93: #include <sys/kauth.h>
1.253     ad         94: #include <sys/lwpctl.h>
1.260     christos   95: #include <sys/pax.h>
1.263     ad         96: #include <sys/cpu.h>
1.282     ad         97: #include <sys/module.h>
1.289     pooka      98: #include <sys/syscallvar.h>
1.56      cgd        99: #include <sys/syscallargs.h>
1.222     elad      100: #if NVERIEXEC > 0
1.197     blymn     101: #include <sys/verified_exec.h>
1.222     elad      102: #endif /* NVERIEXEC > 0 */
1.294     darran    103: #include <sys/sdt.h>
1.337     martin    104: #include <sys/spawn.h>
                    105: #include <sys/prot.h>
1.330     tls       106: #include <sys/cprng.h>
1.55      cgd       107:
1.88      mrg       108: #include <uvm/uvm_extern.h>
                    109:
1.55      cgd       110: #include <machine/reg.h>
                    111:
1.244     dsl       112: #include <compat/common/compat_util.h>
                    113:
1.171     chs       114: static int exec_sigcode_map(struct proc *, const struct emul *);
                    115:
1.143     christos  116: #ifdef DEBUG_EXEC
1.305     matt      117: #define DPRINTF(a) printf a
1.312     christos  118: #define COPYPRINTF(s, a, b) printf("%s, %d: copyout%s @%p %zu\n", __func__, \
                    119:     __LINE__, (s), (a), (b))
1.143     christos  120: #else
                    121: #define DPRINTF(a)
1.312     christos  122: #define COPYPRINTF(s, a, b)
1.143     christos  123: #endif /* DEBUG_EXEC */
1.165     thorpej   124:
1.130     jdolecek  125: /*
1.294     darran    126:  * DTrace SDT provider definitions
                    127:  */
                    128: SDT_PROBE_DEFINE(proc,,,exec,
                    129:            "char *", NULL,
                    130:            NULL, NULL, NULL, NULL,
                    131:            NULL, NULL, NULL, NULL);
                    132: SDT_PROBE_DEFINE(proc,,,exec_success,
                    133:            "char *", NULL,
                    134:            NULL, NULL, NULL, NULL,
                    135:            NULL, NULL, NULL, NULL);
                    136: SDT_PROBE_DEFINE(proc,,,exec_failure,
                    137:            "int", NULL,
                    138:            NULL, NULL, NULL, NULL,
                    139:            NULL, NULL, NULL, NULL);
                    140:
                    141: /*
1.130     jdolecek  142:  * Exec function switch:
                    143:  *
                    144:  * Note that each makecmds function is responsible for loading the
                    145:  * exec package with the necessary functions for any exec-type-specific
                    146:  * handling.
                    147:  *
                    148:  * Functions for specific exec types should be defined in their own
                    149:  * header file.
                    150:  */
1.138     lukem     151: static const struct execsw     **execsw = NULL;
                    152: static int                     nexecs;
                    153:
1.282     ad        154: u_int  exec_maxhdrsz;   /* must not be static - used by netbsd32 */
1.130     jdolecek  155:
                    156: /* list of dynamically loaded execsw entries */
1.282     ad        157: static LIST_HEAD(execlist_head, exec_entry) ex_head =
                    158:     LIST_HEAD_INITIALIZER(ex_head);
1.130     jdolecek  159: struct exec_entry {
1.138     lukem     160:        LIST_ENTRY(exec_entry)  ex_list;
1.282     ad        161:        SLIST_ENTRY(exec_entry) ex_slist;
                    162:        const struct execsw     *ex_sw;
1.130     jdolecek  163: };
                    164:
1.203     christos  165: #ifndef __HAVE_SYSCALL_INTERN
                    166: void   syscall(void);
                    167: #endif
                    168:
1.173     christos  169: /* NetBSD emul struct */
1.282     ad        170: struct emul emul_netbsd = {
1.291     rmind     171:        .e_name =               "netbsd",
                    172:        .e_path =               NULL,
1.133     mycroft   173: #ifndef __HAVE_MINIMAL_EMUL
1.291     rmind     174:        .e_flags =              EMUL_HAS_SYS___syscall,
                    175:        .e_errno =              NULL,
                    176:        .e_nosys =              SYS_syscall,
                    177:        .e_nsysent =            SYS_NSYSENT,
1.133     mycroft   178: #endif
1.291     rmind     179:        .e_sysent =             sysent,
1.124     jdolecek  180: #ifdef SYSCALL_DEBUG
1.291     rmind     181:        .e_syscallnames =       syscallnames,
1.124     jdolecek  182: #else
1.291     rmind     183:        .e_syscallnames =       NULL,
1.124     jdolecek  184: #endif
1.291     rmind     185:        .e_sendsig =            sendsig,
                    186:        .e_trapsignal =         trapsignal,
                    187:        .e_tracesig =           NULL,
                    188:        .e_sigcode =            NULL,
                    189:        .e_esigcode =           NULL,
                    190:        .e_sigobject =          NULL,
                    191:        .e_setregs =            setregs,
                    192:        .e_proc_exec =          NULL,
                    193:        .e_proc_fork =          NULL,
                    194:        .e_proc_exit =          NULL,
                    195:        .e_lwp_fork =           NULL,
                    196:        .e_lwp_exit =           NULL,
1.133     mycroft   197: #ifdef __HAVE_SYSCALL_INTERN
1.291     rmind     198:        .e_syscall_intern =     syscall_intern,
1.133     mycroft   199: #else
1.291     rmind     200:        .e_syscall =            syscall,
1.133     mycroft   201: #endif
1.291     rmind     202:        .e_sysctlovly =         NULL,
                    203:        .e_fault =              NULL,
                    204:        .e_vm_default_addr =    uvm_default_mapaddr,
                    205:        .e_usertrap =           NULL,
                    206:        .e_ucsize =             sizeof(ucontext_t),
                    207:        .e_startlwp =           startlwp
1.124     jdolecek  208: };
                    209:
1.55      cgd       210: /*
1.130     jdolecek  211:  * Exec lock. Used to control access to execsw[] structures.
                    212:  * This must not be static so that netbsd32 can access it, too.
                    213:  */
1.237     ad        214: krwlock_t exec_lock;
1.183     junyoung  215:
1.259     ad        216: static kmutex_t sigobject_lock;
                    217:
1.337     martin    218: /*
                    219:  * Data used between a loadvm and execve part of an "exec" operation
                    220:  */
                    221: struct execve_data {
                    222:        struct exec_package     ed_pack;
                    223:        struct pathbuf          *ed_pathbuf;
                    224:        struct vattr            ed_attr;
                    225:        struct ps_strings       ed_arginfo;
                    226:        char                    *ed_argp;
                    227:        const char              *ed_pathstring;
                    228:        char                    *ed_resolvedpathbuf;
                    229:        size_t                  ed_ps_strings_sz;
                    230:        int                     ed_szsigcode;
                    231:        long                    ed_argc;
                    232:        long                    ed_envc;
                    233: };
                    234:
                    235: /*
                    236:  * data passed from parent lwp to child during a posix_spawn()
                    237:  */
                    238: struct spawn_exec_data {
                    239:        struct execve_data      sed_exec;
                    240:        size_t                  sed_actions_len;
                    241:        struct posix_spawn_file_actions_entry
                    242:                                *sed_actions;
                    243:        struct posix_spawnattr  *sed_attrs;
                    244:        struct proc             *sed_parent;
                    245:        kcondvar_t              sed_cv_child_ready;
                    246:        kmutex_t                sed_mtx_child;
                    247:        int                     sed_error;
                    248: };
                    249:
1.277     ad        250: static void *
                    251: exec_pool_alloc(struct pool *pp, int flags)
                    252: {
                    253:
                    254:        return (void *)uvm_km_alloc(kernel_map, NCARGS, 0,
                    255:            UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
                    256: }
                    257:
                    258: static void
                    259: exec_pool_free(struct pool *pp, void *addr)
                    260: {
                    261:
                    262:        uvm_km_free(kernel_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE);
                    263: }
                    264:
                    265: static struct pool exec_pool;
                    266:
                    267: static struct pool_allocator exec_palloc = {
                    268:        .pa_alloc = exec_pool_alloc,
                    269:        .pa_free = exec_pool_free,
                    270:        .pa_pagesz = NCARGS
                    271: };
                    272:
1.130     jdolecek  273: /*
1.55      cgd       274:  * check exec:
                    275:  * given an "executable" described in the exec package's namei info,
                    276:  * see what we can do with it.
                    277:  *
                    278:  * ON ENTRY:
                    279:  *     exec package with appropriate namei info
1.212     christos  280:  *     lwp pointer of exec'ing lwp
1.55      cgd       281:  *     NO SELF-LOCKED VNODES
                    282:  *
                    283:  * ON EXIT:
                    284:  *     error:  nothing held, etc.  exec header still allocated.
1.77      cgd       285:  *     ok:     filled exec package, executable's vnode (unlocked).
1.55      cgd       286:  *
                    287:  * EXEC SWITCH ENTRY:
                    288:  *     Locked vnode to check, exec package, proc.
                    289:  *
                    290:  * EXEC SWITCH EXIT:
1.77      cgd       291:  *     ok:     return 0, filled exec package, executable's vnode (unlocked).
1.55      cgd       292:  *     error:  destructive:
                    293:  *                     everything deallocated execept exec header.
1.76      cgd       294:  *             non-destructive:
1.77      cgd       295:  *                     error code, executable's vnode (unlocked),
1.76      cgd       296:  *                     exec header unmodified.
1.55      cgd       297:  */
                    298: int
1.205     christos  299: /*ARGSUSED*/
1.301     dholland  300: check_exec(struct lwp *l, struct exec_package *epp, struct pathbuf *pb)
1.55      cgd       301: {
1.138     lukem     302:        int             error, i;
                    303:        struct vnode    *vp;
1.295     dholland  304:        struct nameidata nd;
1.138     lukem     305:        size_t          resid;
1.55      cgd       306:
1.303     dholland  307:        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1.295     dholland  308:
1.55      cgd       309:        /* first get the vnode */
1.295     dholland  310:        if ((error = namei(&nd)) != 0)
1.55      cgd       311:                return error;
1.295     dholland  312:        epp->ep_vp = vp = nd.ni_vp;
                    313:        /* this cannot overflow as both are size PATH_MAX */
1.302     dholland  314:        strcpy(epp->ep_resolvedname, nd.ni_pnbuf);
1.295     dholland  315:
1.296     dholland  316: #ifdef DIAGNOSTIC
                    317:        /* paranoia (take this out once namei stuff stabilizes) */
1.302     dholland  318:        memset(nd.ni_pnbuf, '~', PATH_MAX);
1.295     dholland  319: #endif
1.55      cgd       320:
1.84      mycroft   321:        /* check access and type */
1.55      cgd       322:        if (vp->v_type != VREG) {
1.81      kleink    323:                error = EACCES;
1.55      cgd       324:                goto bad1;
                    325:        }
1.254     pooka     326:        if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
1.84      mycroft   327:                goto bad1;
1.55      cgd       328:
                    329:        /* get attributes */
1.254     pooka     330:        if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
1.55      cgd       331:                goto bad1;
                    332:
                    333:        /* Check mount point */
                    334:        if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
                    335:                error = EACCES;
                    336:                goto bad1;
                    337:        }
1.141     thorpej   338:        if (vp->v_mount->mnt_flag & MNT_NOSUID)
1.83      mycroft   339:                epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
1.55      cgd       340:
                    341:        /* try to open it */
1.254     pooka     342:        if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
1.55      cgd       343:                goto bad1;
                    344:
1.99      wrstuden  345:        /* unlock vp, since we need it unlocked from here on out. */
1.298     hannken   346:        VOP_UNLOCK(vp);
1.77      cgd       347:
1.222     elad      348: #if NVERIEXEC > 0
1.295     dholland  349:        error = veriexec_verify(l, vp, epp->ep_resolvedname,
1.233     elad      350:            epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
1.236     elad      351:            NULL);
                    352:        if (error)
1.234     elad      353:                goto bad2;
1.222     elad      354: #endif /* NVERIEXEC > 0 */
1.160     blymn     355:
1.232     elad      356: #ifdef PAX_SEGVGUARD
1.295     dholland  357:        error = pax_segvguard(l, vp, epp->ep_resolvedname, false);
1.234     elad      358:        if (error)
                    359:                goto bad2;
1.232     elad      360: #endif /* PAX_SEGVGUARD */
                    361:
1.55      cgd       362:        /* now we have the file, get the exec header */
1.74      christos  363:        error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
1.223     ad        364:                        UIO_SYSSPACE, 0, l->l_cred, &resid, NULL);
1.74      christos  365:        if (error)
1.55      cgd       366:                goto bad2;
                    367:        epp->ep_hdrvalid = epp->ep_hdrlen - resid;
                    368:
                    369:        /*
1.136     eeh       370:         * Set up default address space limits.  Can be overridden
                    371:         * by individual exec packages.
1.183     junyoung  372:         *
1.235     rillig    373:         * XXX probably should be all done in the exec packages.
1.136     eeh       374:         */
                    375:        epp->ep_vm_minaddr = VM_MIN_ADDRESS;
                    376:        epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
                    377:        /*
1.55      cgd       378:         * set up the vmcmds for creation of the process
                    379:         * address space
                    380:         */
                    381:        error = ENOEXEC;
1.244     dsl       382:        for (i = 0; i < nexecs; i++) {
1.68      cgd       383:                int newerror;
                    384:
1.130     jdolecek  385:                epp->ep_esch = execsw[i];
1.212     christos  386:                newerror = (*execsw[i]->es_makecmds)(l, epp);
1.244     dsl       387:
                    388:                if (!newerror) {
1.318     reinoud   389:                        /* Seems ok: check that entry point is not too high */
1.323     reinoud   390:                        if (epp->ep_entry > epp->ep_vm_maxaddr) {
1.322     reinoud   391: #ifdef DIAGNOSTIC
1.329     reinoud   392:                                printf("%s: rejecting %p due to "
1.331     christos  393:                                    "too high entry address (> %p)\n",
                    394:                                         __func__, (void *)epp->ep_entry,
                    395:                                         (void *)epp->ep_vm_maxaddr);
1.322     reinoud   396: #endif
1.318     reinoud   397:                                error = ENOEXEC;
                    398:                                break;
                    399:                        }
                    400:                        /* Seems ok: check that entry point is not too low */
1.323     reinoud   401:                        if (epp->ep_entry < epp->ep_vm_minaddr) {
1.322     reinoud   402: #ifdef DIAGNOSTIC
1.329     reinoud   403:                                printf("%s: rejecting %p due to "
1.331     christos  404:                                    "too low entry address (< %p)\n",
                    405:                                     __func__, (void *)epp->ep_entry,
                    406:                                     (void *)epp->ep_vm_minaddr);
1.322     reinoud   407: #endif
1.244     dsl       408:                                error = ENOEXEC;
                    409:                                break;
                    410:                        }
                    411:
                    412:                        /* check limits */
                    413:                        if ((epp->ep_tsize > MAXTSIZ) ||
                    414:                            (epp->ep_dsize > (u_quad_t)l->l_proc->p_rlimit
                    415:                                                    [RLIMIT_DATA].rlim_cur)) {
1.322     reinoud   416: #ifdef DIAGNOSTIC
1.323     reinoud   417:                                printf("%s: rejecting due to "
1.331     christos  418:                                    "limits (t=%llu > %llu || d=%llu > %llu)\n",
                    419:                                    __func__,
                    420:                                    (unsigned long long)epp->ep_tsize,
                    421:                                    (unsigned long long)MAXTSIZ,
                    422:                                    (unsigned long long)epp->ep_dsize,
1.332     christos  423:                                    (unsigned long long)
                    424:                                    l->l_proc->p_rlimit[RLIMIT_DATA].rlim_cur);
1.322     reinoud   425: #endif
1.244     dsl       426:                                error = ENOMEM;
                    427:                                break;
                    428:                        }
                    429:                        return 0;
                    430:                }
                    431:
                    432:                if (epp->ep_emul_root != NULL) {
                    433:                        vrele(epp->ep_emul_root);
                    434:                        epp->ep_emul_root = NULL;
                    435:                }
                    436:                if (epp->ep_interp != NULL) {
                    437:                        vrele(epp->ep_interp);
                    438:                        epp->ep_interp = NULL;
                    439:                }
                    440:
1.68      cgd       441:                /* make sure the first "interesting" error code is saved. */
1.244     dsl       442:                if (error == ENOEXEC)
1.68      cgd       443:                        error = newerror;
1.124     jdolecek  444:
1.244     dsl       445:                if (epp->ep_flags & EXEC_DESTR)
                    446:                        /* Error from "#!" code, tidied up by recursive call */
1.55      cgd       447:                        return error;
                    448:        }
                    449:
1.249     pooka     450:        /* not found, error */
                    451:
1.55      cgd       452:        /*
                    453:         * free any vmspace-creation commands,
                    454:         * and release their references
                    455:         */
                    456:        kill_vmcmds(&epp->ep_vmcmds);
                    457:
                    458: bad2:
                    459:        /*
1.99      wrstuden  460:         * close and release the vnode, restore the old one, free the
1.55      cgd       461:         * pathname buf, and punt.
                    462:         */
1.99      wrstuden  463:        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.254     pooka     464:        VOP_CLOSE(vp, FREAD, l->l_cred);
1.99      wrstuden  465:        vput(vp);
1.55      cgd       466:        return error;
                    467:
                    468: bad1:
                    469:        /*
                    470:         * free the namei pathname buffer, and put the vnode
                    471:         * (which we don't yet have open).
                    472:         */
1.77      cgd       473:        vput(vp);                               /* was still locked */
1.55      cgd       474:        return error;
                    475: }
                    476:
1.188     chs       477: #ifdef __MACHINE_STACK_GROWS_UP
                    478: #define STACK_PTHREADSPACE NBPG
                    479: #else
                    480: #define STACK_PTHREADSPACE 0
                    481: #endif
                    482:
1.204     cube      483: static int
                    484: execve_fetch_element(char * const *array, size_t index, char **value)
                    485: {
                    486:        return copyin(array + index, value, sizeof(*value));
                    487: }
                    488:
1.55      cgd       489: /*
                    490:  * exec system call
                    491:  */
1.75      christos  492: int
1.258     dsl       493: sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval)
1.71      thorpej   494: {
1.258     dsl       495:        /* {
1.138     lukem     496:                syscallarg(const char *)        path;
                    497:                syscallarg(char * const *)      argp;
                    498:                syscallarg(char * const *)      envp;
1.258     dsl       499:        } */
1.204     cube      500:
                    501:        return execve1(l, SCARG(uap, path), SCARG(uap, argp),
                    502:            SCARG(uap, envp), execve_fetch_element);
                    503: }
                    504:
1.317     manu      505: int
                    506: sys_fexecve(struct lwp *l, const struct sys_fexecve_args *uap,
                    507:     register_t *retval)
                    508: {
                    509:        /* {
                    510:                syscallarg(int)                 fd;
                    511:                syscallarg(char * const *)      argp;
                    512:                syscallarg(char * const *)      envp;
                    513:        } */
                    514:
                    515:        return ENOSYS;
                    516: }
                    517:
1.282     ad        518: /*
                    519:  * Load modules to try and execute an image that we do not understand.
                    520:  * If no execsw entries are present, we load those likely to be needed
                    521:  * in order to run native images only.  Otherwise, we autoload all
                    522:  * possible modules that could let us run the binary.  XXX lame
                    523:  */
                    524: static void
                    525: exec_autoload(void)
                    526: {
                    527: #ifdef MODULAR
                    528:        static const char * const native[] = {
                    529:                "exec_elf32",
                    530:                "exec_elf64",
                    531:                "exec_script",
                    532:                NULL
                    533:        };
                    534:        static const char * const compat[] = {
                    535:                "exec_elf32",
                    536:                "exec_elf64",
                    537:                "exec_script",
                    538:                "exec_aout",
                    539:                "exec_coff",
                    540:                "exec_ecoff",
                    541:                "compat_aoutm68k",
                    542:                "compat_freebsd",
                    543:                "compat_ibcs2",
                    544:                "compat_linux",
                    545:                "compat_linux32",
                    546:                "compat_netbsd32",
                    547:                "compat_sunos",
                    548:                "compat_sunos32",
                    549:                "compat_svr4",
                    550:                "compat_svr4_32",
                    551:                "compat_ultrix",
                    552:                NULL
                    553:        };
                    554:        char const * const *list;
                    555:        int i;
                    556:
                    557:        list = (nexecs == 0 ? native : compat);
                    558:        for (i = 0; list[i] != NULL; i++) {
                    559:                if (module_autoload(list[i], MODULE_CLASS_MISC) != 0) {
                    560:                        continue;
                    561:                }
                    562:                yield();
                    563:        }
                    564: #endif
                    565: }
                    566:
1.337     martin    567: static int
                    568: execve_loadvm(struct lwp *l, const char *path, char * const *args,
                    569:        char * const *envs, execve_fetch_element_t fetch_element,
                    570:        struct execve_data * restrict data)
1.204     cube      571: {
1.153     thorpej   572:        int                     error;
1.164     thorpej   573:        struct proc             *p;
1.138     lukem     574:        char                    *dp, *sp;
1.248     christos  575:        size_t                  i, len;
1.265     yamt      576:        struct exec_fakearg     *tmpfap;
1.282     ad        577:        u_int                   modgen;
1.337     martin    578:
                    579:        KASSERT(data != NULL);
1.55      cgd       580:
1.237     ad        581:        p = l->l_proc;
1.282     ad        582:        modgen = 0;
1.164     thorpej   583:
1.294     darran    584:        SDT_PROBE(proc,,,exec, path, 0, 0, 0, 0);
                    585:
1.149     christos  586:        /*
1.269     christos  587:         * Check if we have exceeded our number of processes limit.
                    588:         * This is so that we handle the case where a root daemon
                    589:         * forked, ran setuid to become the desired user and is trying
                    590:         * to exec. The obvious place to do the reference counting check
                    591:         * is setuid(), but we don't do the reference counting check there
                    592:         * like other OS's do because then all the programs that use setuid()
                    593:         * must be modified to check the return code of setuid() and exit().
                    594:         * It is dangerous to make setuid() fail, because it fails open and
                    595:         * the program will continue to run as root. If we make it succeed
                    596:         * and return an error code, again we are not enforcing the limit.
                    597:         * The best place to enforce the limit is here, when the process tries
                    598:         * to execute a new image, because eventually the process will need
                    599:         * to call exec in order to do something useful.
                    600:         */
1.282     ad        601:  retry:
1.287     christos  602:        if ((p->p_flag & PK_SUGID) && kauth_authorize_generic(l->l_cred,
                    603:            KAUTH_GENERIC_ISSUSER, NULL) != 0 && chgproccnt(kauth_cred_getuid(
                    604:            l->l_cred), 0) > p->p_rlimit[RLIMIT_NPROC].rlim_cur)
1.269     christos  605:                return EAGAIN;
                    606:
                    607:        /*
1.237     ad        608:         * Drain existing references and forbid new ones.  The process
                    609:         * should be left alone until we're done here.  This is necessary
                    610:         * to avoid race conditions - e.g. in ptrace() - that might allow
                    611:         * a local user to illicitly obtain elevated privileges.
                    612:         */
1.252     ad        613:        rw_enter(&p->p_reflock, RW_WRITER);
1.149     christos  614:
1.55      cgd       615:        /*
1.129     jdolecek  616:         * Init the namei data to point the file user's program name.
                    617:         * This is done here rather than in check_exec(), so that it's
                    618:         * possible to override this settings if any of makecmd/probe
                    619:         * functions call check_exec() recursively - for example,
                    620:         * see exec_script_makecmds().
                    621:         */
1.337     martin    622:        error = pathbuf_copyin(path, &data->ed_pathbuf);
1.248     christos  623:        if (error) {
1.312     christos  624:                DPRINTF(("%s: pathbuf_copyin path @%p %d\n", __func__,
                    625:                    path, error));
1.200     elad      626:                goto clrflg;
1.248     christos  627:        }
1.337     martin    628:        data->ed_pathstring = pathbuf_stringcopy_get(data->ed_pathbuf);
                    629:
                    630:        data->ed_resolvedpathbuf = PNBUF_GET();
1.295     dholland  631: #ifdef DIAGNOSTIC
1.337     martin    632:        strcpy(data->ed_resolvedpathbuf, "/wrong");
1.295     dholland  633: #endif
1.55      cgd       634:
                    635:        /*
                    636:         * initialize the fields of the exec package.
                    637:         */
1.337     martin    638:        data->ed_pack.ep_name = path;
                    639:        data->ed_pack.ep_kname = data->ed_pathstring;
                    640:        data->ed_pack.ep_resolvedname = data->ed_resolvedpathbuf;
                    641:        data->ed_pack.ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP);
                    642:        data->ed_pack.ep_hdrlen = exec_maxhdrsz;
                    643:        data->ed_pack.ep_hdrvalid = 0;
                    644:        data->ed_pack.ep_emul_arg = NULL;
                    645:        data->ed_pack.ep_emul_arg_free = NULL;
                    646:        data->ed_pack.ep_vmcmds.evs_cnt = 0;
                    647:        data->ed_pack.ep_vmcmds.evs_used = 0;
                    648:        data->ed_pack.ep_vap = &data->ed_attr;
                    649:        data->ed_pack.ep_flags = 0;
                    650:        data->ed_pack.ep_emul_root = NULL;
                    651:        data->ed_pack.ep_interp = NULL;
                    652:        data->ed_pack.ep_esch = NULL;
                    653:        data->ed_pack.ep_pax_flags = 0;
1.55      cgd       654:
1.237     ad        655:        rw_enter(&exec_lock, RW_READER);
1.130     jdolecek  656:
1.55      cgd       657:        /* see if we can run it. */
1.337     martin    658:        if ((error = check_exec(l, &data->ed_pack, data->ed_pathbuf)) != 0) {
1.261     xtraeme   659:                if (error != ENOENT) {
1.312     christos  660:                        DPRINTF(("%s: check exec failed %d\n",
                    661:                            __func__, error));
1.261     xtraeme   662:                }
1.55      cgd       663:                goto freehdr;
1.248     christos  664:        }
1.55      cgd       665:
                    666:        /* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */
                    667:
                    668:        /* allocate an argument buffer */
1.337     martin    669:        data->ed_argp = pool_get(&exec_pool, PR_WAITOK);
                    670:        KASSERT(data->ed_argp != NULL);
                    671:        dp = data->ed_argp;
                    672:        data->ed_argc = 0;
1.55      cgd       673:
                    674:        /* copy the fake args list, if there's one, freeing it as we go */
1.337     martin    675:        if (data->ed_pack.ep_flags & EXEC_HASARGL) {
                    676:                tmpfap = data->ed_pack.ep_fa;
1.265     yamt      677:                while (tmpfap->fa_arg != NULL) {
                    678:                        const char *cp;
1.55      cgd       679:
1.265     yamt      680:                        cp = tmpfap->fa_arg;
1.55      cgd       681:                        while (*cp)
                    682:                                *dp++ = *cp++;
1.276     ad        683:                        *dp++ = '\0';
1.290     dsl       684:                        ktrexecarg(tmpfap->fa_arg, cp - tmpfap->fa_arg);
1.55      cgd       685:
1.265     yamt      686:                        kmem_free(tmpfap->fa_arg, tmpfap->fa_len);
1.337     martin    687:                        tmpfap++; data->ed_argc++;
1.55      cgd       688:                }
1.337     martin    689:                kmem_free(data->ed_pack.ep_fa, data->ed_pack.ep_fa_len);
                    690:                data->ed_pack.ep_flags &= ~EXEC_HASARGL;
1.55      cgd       691:        }
                    692:
                    693:        /* Now get argv & environment */
1.204     cube      694:        if (args == NULL) {
1.312     christos  695:                DPRINTF(("%s: null args\n", __func__));
1.55      cgd       696:                error = EINVAL;
                    697:                goto bad;
                    698:        }
1.204     cube      699:        /* 'i' will index the argp/envp element to be retrieved */
                    700:        i = 0;
1.337     martin    701:        if (data->ed_pack.ep_flags & EXEC_SKIPARG)
1.204     cube      702:                i++;
1.55      cgd       703:
                    704:        while (1) {
1.337     martin    705:                len = data->ed_argp + ARG_MAX - dp;
1.248     christos  706:                if ((error = (*fetch_element)(args, i, &sp)) != 0) {
1.312     christos  707:                        DPRINTF(("%s: fetch_element args %d\n",
1.313     jakllsch  708:                            __func__, error));
1.55      cgd       709:                        goto bad;
1.248     christos  710:                }
1.55      cgd       711:                if (!sp)
                    712:                        break;
1.74      christos  713:                if ((error = copyinstr(sp, dp, len, &len)) != 0) {
1.312     christos  714:                        DPRINTF(("%s: copyinstr args %d\n", __func__, error));
1.55      cgd       715:                        if (error == ENAMETOOLONG)
                    716:                                error = E2BIG;
                    717:                        goto bad;
                    718:                }
1.247     ad        719:                ktrexecarg(dp, len - 1);
1.55      cgd       720:                dp += len;
1.204     cube      721:                i++;
1.337     martin    722:                data->ed_argc++;
1.55      cgd       723:        }
                    724:
1.337     martin    725:        data->ed_envc = 0;
1.74      christos  726:        /* environment need not be there */
1.204     cube      727:        if (envs != NULL) {
                    728:                i = 0;
1.55      cgd       729:                while (1) {
1.337     martin    730:                        len = data->ed_argp + ARG_MAX - dp;
1.248     christos  731:                        if ((error = (*fetch_element)(envs, i, &sp)) != 0) {
1.312     christos  732:                                DPRINTF(("%s: fetch_element env %d\n",
                    733:                                    __func__, error));
1.55      cgd       734:                                goto bad;
1.248     christos  735:                        }
1.55      cgd       736:                        if (!sp)
                    737:                                break;
1.74      christos  738:                        if ((error = copyinstr(sp, dp, len, &len)) != 0) {
1.312     christos  739:                                DPRINTF(("%s: copyinstr env %d\n",
                    740:                                    __func__, error));
1.55      cgd       741:                                if (error == ENAMETOOLONG)
                    742:                                        error = E2BIG;
                    743:                                goto bad;
                    744:                        }
1.337     martin    745:
1.247     ad        746:                        ktrexecenv(dp, len - 1);
1.55      cgd       747:                        dp += len;
1.204     cube      748:                        i++;
1.337     martin    749:                        data->ed_envc++;
1.55      cgd       750:                }
                    751:        }
1.61      mycroft   752:
                    753:        dp = (char *) ALIGN(dp);
1.55      cgd       754:
1.337     martin    755:        data->ed_szsigcode = data->ed_pack.ep_esch->es_emul->e_esigcode -
                    756:            data->ed_pack.ep_esch->es_emul->e_sigcode;
1.65      fvdl      757:
1.267     dsl       758: #ifdef __MACHINE_STACK_GROWS_UP
                    759: /* See big comment lower down */
                    760: #define        RTLD_GAP        32
                    761: #else
                    762: #define        RTLD_GAP        0
                    763: #endif
                    764:
1.55      cgd       765:        /* Now check if args & environ fit into new stack */
1.337     martin    766:        if (data->ed_pack.ep_flags & EXEC_32) {
                    767:                data->ed_ps_strings_sz = sizeof(struct ps_strings32);
                    768:                len = ((data->ed_argc + data->ed_envc + 2 +
                    769:                    data->ed_pack.ep_esch->es_arglen) *
1.267     dsl       770:                    sizeof(int) + sizeof(int) + dp + RTLD_GAP +
1.337     martin    771:                    data->ed_szsigcode + data->ed_ps_strings_sz + STACK_PTHREADSPACE)
                    772:                    - data->ed_argp;
1.311     joerg     773:        } else {
1.337     martin    774:                data->ed_ps_strings_sz = sizeof(struct ps_strings);
                    775:                len = ((data->ed_argc + data->ed_envc + 2 +
                    776:                    data->ed_pack.ep_esch->es_arglen) *
1.267     dsl       777:                    sizeof(char *) + sizeof(int) + dp + RTLD_GAP +
1.337     martin    778:                    data->ed_szsigcode + data->ed_ps_strings_sz + STACK_PTHREADSPACE)
                    779:                    - data->ed_argp;
1.311     joerg     780:        }
1.67      christos  781:
1.262     elad      782: #ifdef PAX_ASLR
                    783:        if (pax_aslr_active(l))
1.330     tls       784:                len += (cprng_fast32() % PAGE_SIZE);
1.262     elad      785: #endif /* PAX_ASLR */
                    786:
1.334     christos  787:        /* make the stack "safely" aligned */
1.335     christos  788:        len = STACK_LEN_ALIGN(len, STACK_ALIGNBYTES);
1.55      cgd       789:
1.337     martin    790:        if (len > data->ed_pack.ep_ssize) {
                    791:                /* in effect, compare to initial limit */
1.312     christos  792:                DPRINTF(("%s: stack limit exceeded %zu\n", __func__, len));
1.55      cgd       793:                goto bad;
                    794:        }
1.337     martin    795:        /* adjust "active stack depth" for process VSZ */
                    796:        data->ed_pack.ep_ssize = len;
                    797:
                    798:        return 0;
                    799:
                    800:  bad:
                    801:        /* free the vmspace-creation commands, and release their references */
                    802:        kill_vmcmds(&data->ed_pack.ep_vmcmds);
                    803:        /* kill any opened file descriptor, if necessary */
                    804:        if (data->ed_pack.ep_flags & EXEC_HASFD) {
                    805:                data->ed_pack.ep_flags &= ~EXEC_HASFD;
                    806:                fd_close(data->ed_pack.ep_fd);
                    807:        }
                    808:        /* close and put the exec'd file */
                    809:        vn_lock(data->ed_pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
                    810:        VOP_CLOSE(data->ed_pack.ep_vp, FREAD, l->l_cred);
                    811:        vput(data->ed_pack.ep_vp);
                    812:        pool_put(&exec_pool, data->ed_argp);
                    813:
                    814:  freehdr:
                    815:        kmem_free(data->ed_pack.ep_hdr, data->ed_pack.ep_hdrlen);
                    816:        if (data->ed_pack.ep_emul_root != NULL)
                    817:                vrele(data->ed_pack.ep_emul_root);
                    818:        if (data->ed_pack.ep_interp != NULL)
                    819:                vrele(data->ed_pack.ep_interp);
                    820:
                    821:        rw_exit(&exec_lock);
                    822:
                    823:        pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
                    824:        pathbuf_destroy(data->ed_pathbuf);
                    825:        PNBUF_PUT(data->ed_resolvedpathbuf);
                    826:
                    827:  clrflg:
                    828:        rw_exit(&p->p_reflock);
                    829:
                    830:        if (modgen != module_gen && error == ENOEXEC) {
                    831:                modgen = module_gen;
                    832:                exec_autoload();
                    833:                goto retry;
                    834:        }
                    835:
                    836:        SDT_PROBE(proc,,,exec_failure, error, 0, 0, 0, 0);
                    837:        return error;
                    838: }
                    839:
                    840: static int
                    841: execve_runproc(struct lwp *l, struct execve_data * restrict data)
                    842: {
                    843:        int error = 0;
                    844:        struct proc             *p;
                    845:        size_t                  i;
                    846:        char                    *stack, *dp;
                    847:        const char              *commandname;
                    848:        struct ps_strings32     arginfo32;
                    849:        struct exec_vmcmd       *base_vcp;
                    850:        void                    *aip;
                    851:        struct vmspace          *vm;
                    852:        ksiginfo_t              ksi;
                    853:        ksiginfoq_t             kq;
                    854:        bool                    proc_is_new;
                    855:
                    856:        KASSERT(rw_lock_held(&exec_lock));
                    857:        KASSERT(data != NULL);
                    858:        if (data == NULL)
                    859:                return (EINVAL);
                    860:
                    861:        p = l->l_proc;
                    862:        proc_is_new = p->p_vmspace == NULL;
                    863:
                    864:        base_vcp = NULL;
                    865:
                    866:        if (data->ed_pack.ep_flags & EXEC_32)
                    867:                aip = &arginfo32;
                    868:        else
                    869:                aip = &data->ed_arginfo;
1.55      cgd       870:
1.237     ad        871:        /* Get rid of other LWPs. */
1.340     rmind     872:        if (p->p_nlwps > 1) {
1.272     ad        873:                mutex_enter(p->p_lock);
1.237     ad        874:                exit_lwps(l);
1.272     ad        875:                mutex_exit(p->p_lock);
1.237     ad        876:        }
1.164     thorpej   877:        KDASSERT(p->p_nlwps == 1);
                    878:
1.253     ad        879:        /* Destroy any lwpctl info. */
                    880:        if (p->p_lwpctl != NULL)
                    881:                lwp_ctl_exit();
                    882:
1.164     thorpej   883:        /* Remove POSIX timers */
                    884:        timers_free(p, TIMERS_POSIX);
                    885:
1.86      thorpej   886:        /*
                    887:         * Do whatever is necessary to prepare the address space
                    888:         * for remapping.  Note that this might replace the current
                    889:         * vmspace with another!
                    890:         */
1.337     martin    891:        uvmspace_exec(l, data->ed_pack.ep_vm_minaddr, data->ed_pack.ep_vm_maxaddr);
1.55      cgd       892:
1.186     chs       893:        /* record proc's vnode, for use by procfs and others */
                    894:         if (p->p_textvp)
                    895:                 vrele(p->p_textvp);
1.337     martin    896:        vref(data->ed_pack.ep_vp);
                    897:        p->p_textvp = data->ed_pack.ep_vp;
1.186     chs       898:
1.55      cgd       899:        /* Now map address space */
1.86      thorpej   900:        vm = p->p_vmspace;
1.337     martin    901:        vm->vm_taddr = (void *)data->ed_pack.ep_taddr;
                    902:        vm->vm_tsize = btoc(data->ed_pack.ep_tsize);
                    903:        vm->vm_daddr = (void*)data->ed_pack.ep_daddr;
                    904:        vm->vm_dsize = btoc(data->ed_pack.ep_dsize);
                    905:        vm->vm_ssize = btoc(data->ed_pack.ep_ssize);
1.288     mrg       906:        vm->vm_issize = 0;
1.337     martin    907:        vm->vm_maxsaddr = (void *)data->ed_pack.ep_maxsaddr;
                    908:        vm->vm_minsaddr = (void *)data->ed_pack.ep_minsaddr;
1.55      cgd       909:
1.260     christos  910: #ifdef PAX_ASLR
                    911:        pax_aslr_init(l, vm);
                    912: #endif /* PAX_ASLR */
                    913:
1.55      cgd       914:        /* create the new process's VM space by running the vmcmds */
                    915: #ifdef DIAGNOSTIC
1.337     martin    916:        if (data->ed_pack.ep_vmcmds.evs_used == 0)
1.312     christos  917:                panic("%s: no vmcmds", __func__);
1.55      cgd       918: #endif
1.326     reinoud   919:
                    920: #ifdef DEBUG_EXEC
                    921:        {
                    922:                size_t j;
1.337     martin    923:                struct exec_vmcmd *vp = &data->ed_pack.ep_vmcmds.evs_cmds[0];
                    924:                DPRINTF(("vmcmds %u\n", data->ed_pack.ep_vmcmds.evs_used));
                    925:                for (j = 0; j < data->ed_pack.ep_vmcmds.evs_used; j++) {
1.328     reinoud   926:                        DPRINTF(("vmcmd[%zu] = vmcmd_map_%s %#"
1.326     reinoud   927:                            PRIxVADDR"/%#"PRIxVSIZE" fd@%#"
                    928:                            PRIxVSIZE" prot=0%o flags=%d\n", j,
                    929:                            vp[j].ev_proc == vmcmd_map_pagedvn ?
                    930:                            "pagedvn" :
                    931:                            vp[j].ev_proc == vmcmd_map_readvn ?
                    932:                            "readvn" :
                    933:                            vp[j].ev_proc == vmcmd_map_zero ?
                    934:                            "zero" : "*unknown*",
                    935:                            vp[j].ev_addr, vp[j].ev_len,
                    936:                            vp[j].ev_offset, vp[j].ev_prot,
1.327     reinoud   937:                            vp[j].ev_flags));
1.326     reinoud   938:                }
                    939:        }
                    940: #endif /* DEBUG_EXEC */
                    941:
1.337     martin    942:        for (i = 0; i < data->ed_pack.ep_vmcmds.evs_used && !error; i++) {
1.55      cgd       943:                struct exec_vmcmd *vcp;
                    944:
1.337     martin    945:                vcp = &data->ed_pack.ep_vmcmds.evs_cmds[i];
1.114     matt      946:                if (vcp->ev_flags & VMCMD_RELATIVE) {
                    947: #ifdef DIAGNOSTIC
                    948:                        if (base_vcp == NULL)
1.312     christos  949:                                panic("%s: relative vmcmd with no base",
                    950:                                    __func__);
1.114     matt      951:                        if (vcp->ev_flags & VMCMD_BASE)
1.312     christos  952:                                panic("%s: illegal base & relative vmcmd",
                    953:                                    __func__);
1.114     matt      954: #endif
                    955:                        vcp->ev_addr += base_vcp->ev_addr;
                    956:                }
1.212     christos  957:                error = (*vcp->ev_proc)(l, vcp);
1.143     christos  958: #ifdef DEBUG_EXEC
1.111     matt      959:                if (error) {
1.248     christos  960:                        size_t j;
1.337     martin    961:                        struct exec_vmcmd *vp =
                    962:                            &data->ed_pack.ep_vmcmds.evs_cmds[0];
1.327     reinoud   963:                        DPRINTF(("vmcmds %zu/%u, error %d\n", i,
1.337     martin    964:                            data->ed_pack.ep_vmcmds.evs_used, error));
                    965:                        for (j = 0; j < data->ed_pack.ep_vmcmds.evs_used; j++) {
1.327     reinoud   966:                                DPRINTF(("vmcmd[%zu] = vmcmd_map_%s %#"
1.310     christos  967:                                    PRIxVADDR"/%#"PRIxVSIZE" fd@%#"
                    968:                                    PRIxVSIZE" prot=0%o flags=%d\n", j,
                    969:                                    vp[j].ev_proc == vmcmd_map_pagedvn ?
                    970:                                    "pagedvn" :
                    971:                                    vp[j].ev_proc == vmcmd_map_readvn ?
                    972:                                    "readvn" :
                    973:                                    vp[j].ev_proc == vmcmd_map_zero ?
                    974:                                    "zero" : "*unknown*",
                    975:                                    vp[j].ev_addr, vp[j].ev_len,
1.143     christos  976:                                    vp[j].ev_offset, vp[j].ev_prot,
1.327     reinoud   977:                                    vp[j].ev_flags));
1.326     reinoud   978:                                if (j == i)
1.327     reinoud   979:                                        DPRINTF(("     ^--- failed\n"));
1.326     reinoud   980:                        }
1.111     matt      981:                }
1.143     christos  982: #endif /* DEBUG_EXEC */
1.114     matt      983:                if (vcp->ev_flags & VMCMD_BASE)
                    984:                        base_vcp = vcp;
1.55      cgd       985:        }
                    986:
                    987:        /* free the vmspace-creation commands, and release their references */
1.337     martin    988:        kill_vmcmds(&data->ed_pack.ep_vmcmds);
1.55      cgd       989:
1.337     martin    990:        vn_lock(data->ed_pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
                    991:        VOP_CLOSE(data->ed_pack.ep_vp, FREAD, l->l_cred);
                    992:        vput(data->ed_pack.ep_vp);
1.186     chs       993:
1.55      cgd       994:        /* if an error happened, deallocate and punt */
1.111     matt      995:        if (error) {
1.312     christos  996:                DPRINTF(("%s: vmcmd %zu failed: %d\n", __func__, i - 1, error));
1.55      cgd       997:                goto exec_abort;
1.111     matt      998:        }
1.55      cgd       999:
                   1000:        /* remember information about the process */
1.337     martin   1001:        data->ed_arginfo.ps_nargvstr = data->ed_argc;
                   1002:        data->ed_arginfo.ps_nenvstr = data->ed_envc;
1.55      cgd      1003:
1.255     christos 1004:        /* set command name & other accounting info */
1.337     martin   1005:        commandname = strrchr(data->ed_pack.ep_resolvedname, '/');
1.295     dholland 1006:        if (commandname != NULL) {
                   1007:                commandname++;
                   1008:        } else {
1.337     martin   1009:                commandname = data->ed_pack.ep_resolvedname;
1.295     dholland 1010:        }
                   1011:        i = min(strlen(commandname), MAXCOMLEN);
                   1012:        (void)memcpy(p->p_comm, commandname, i);
1.255     christos 1013:        p->p_comm[i] = '\0';
                   1014:
                   1015:        dp = PNBUF_GET();
                   1016:        /*
                   1017:         * If the path starts with /, we don't need to do any work.
                   1018:         * This handles the majority of the cases.
                   1019:         * In the future perhaps we could canonicalize it?
                   1020:         */
1.337     martin   1021:        if (data->ed_pathstring[0] == '/')
                   1022:                (void)strlcpy(data->ed_pack.ep_path = dp, data->ed_pathstring,
                   1023:                    MAXPATHLEN);
1.333     dholland 1024: #ifdef notyet
1.255     christos 1025:        /*
                   1026:         * Although this works most of the time [since the entry was just
                   1027:         * entered in the cache] we don't use it because it theoretically
                   1028:         * can fail and it is not the cleanest interface, because there
                   1029:         * could be races. When the namei cache is re-written, this can
                   1030:         * be changed to use the appropriate function.
                   1031:         */
                   1032:        else if (!(error = vnode_to_path(dp, MAXPATHLEN, p->p_textvp, l, p)))
1.337     martin   1033:                data->ed_pack.ep_path = dp;
1.255     christos 1034: #endif
                   1035:        else {
1.333     dholland 1036: #ifdef notyet
1.255     christos 1037:                printf("Cannot get path for pid %d [%s] (error %d)",
                   1038:                    (int)p->p_pid, p->p_comm, error);
                   1039: #endif
1.337     martin   1040:                data->ed_pack.ep_path = NULL;
1.255     christos 1041:                PNBUF_PUT(dp);
                   1042:        }
                   1043:
1.163     chs      1044:        stack = (char *)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
1.337     martin   1045:                STACK_PTHREADSPACE + data->ed_ps_strings_sz + data->ed_szsigcode),
                   1046:                data->ed_pack.ep_ssize - (data->ed_ps_strings_sz + data->ed_szsigcode));
1.267     dsl      1047:
1.163     chs      1048: #ifdef __MACHINE_STACK_GROWS_UP
                   1049:        /*
                   1050:         * The copyargs call always copies into lower addresses
                   1051:         * first, moving towards higher addresses, starting with
1.183     junyoung 1052:         * the stack pointer that we give.  When the stack grows
                   1053:         * down, this puts argc/argv/envp very shallow on the
1.267     dsl      1054:         * stack, right at the first user stack pointer.
                   1055:         * When the stack grows up, the situation is reversed.
1.163     chs      1056:         *
                   1057:         * Normally, this is no big deal.  But the ld_elf.so _rtld()
1.183     junyoung 1058:         * function expects to be called with a single pointer to
                   1059:         * a region that has a few words it can stash values into,
1.163     chs      1060:         * followed by argc/argv/envp.  When the stack grows down,
                   1061:         * it's easy to decrement the stack pointer a little bit to
                   1062:         * allocate the space for these few words and pass the new
                   1063:         * stack pointer to _rtld.  When the stack grows up, however,
1.171     chs      1064:         * a few words before argc is part of the signal trampoline, XXX
1.163     chs      1065:         * so we have a problem.
                   1066:         *
1.183     junyoung 1067:         * Instead of changing how _rtld works, we take the easy way
1.267     dsl      1068:         * out and steal 32 bytes before we call copyargs.
1.337     martin   1069:         * This extra space was allowed for when 'pack.ep_ssize' was calculated.
1.163     chs      1070:         */
1.267     dsl      1071:        stack += RTLD_GAP;
1.163     chs      1072: #endif /* __MACHINE_STACK_GROWS_UP */
1.337     martin   1073:
                   1074:        /* Now copy argc, args & environ to new stack */
                   1075:        error = (*data->ed_pack.ep_esch->es_copyargs)(l, &data->ed_pack,
                   1076:            &data->ed_arginfo, &stack, data->ed_argp);
1.163     chs      1077:
1.337     martin   1078:        if (data->ed_pack.ep_path) {
                   1079:                PNBUF_PUT(data->ed_pack.ep_path);
                   1080:                data->ed_pack.ep_path = NULL;
1.255     christos 1081:        }
1.144     christos 1082:        if (error) {
1.312     christos 1083:                DPRINTF(("%s: copyargs failed %d\n", __func__, error));
1.55      cgd      1084:                goto exec_abort;
1.111     matt     1085:        }
1.144     christos 1086:        /* Move the stack back to original point */
1.337     martin   1087:        stack = (char *)STACK_GROW(vm->vm_minsaddr, data->ed_pack.ep_ssize);
1.55      cgd      1088:
1.121     eeh      1089:        /* fill process ps_strings info */
1.311     joerg    1090:        p->p_psstrp = (vaddr_t)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
1.337     martin   1091:            STACK_PTHREADSPACE), data->ed_ps_strings_sz);
1.311     joerg    1092:
1.337     martin   1093:        if (data->ed_pack.ep_flags & EXEC_32) {
                   1094:                arginfo32.ps_argvstr = (vaddr_t)data->ed_arginfo.ps_argvstr;
                   1095:                arginfo32.ps_nargvstr = data->ed_arginfo.ps_nargvstr;
                   1096:                arginfo32.ps_envstr = (vaddr_t)data->ed_arginfo.ps_envstr;
                   1097:                arginfo32.ps_nenvstr = data->ed_arginfo.ps_nenvstr;
1.311     joerg    1098:        }
1.121     eeh      1099:
1.55      cgd      1100:        /* copy out the process's ps_strings structure */
1.337     martin   1101:        if ((error = copyout(aip, (void *)p->p_psstrp, data->ed_ps_strings_sz))
                   1102:            != 0) {
1.312     christos 1103:                DPRINTF(("%s: ps_strings copyout %p->%p size %zu failed\n",
1.337     martin   1104:                    __func__, aip, (void *)p->p_psstrp, data->ed_ps_strings_sz));
1.55      cgd      1105:                goto exec_abort;
1.111     matt     1106:        }
1.109     simonb   1107:
1.307     pooka    1108:        cwdexec(p);
1.270     ad       1109:        fd_closeexec();         /* handle close on exec */
1.315     alnsn    1110:
                   1111:        if (__predict_false(ktrace_on))
                   1112:                fd_ktrexecfd();
                   1113:
1.55      cgd      1114:        execsigs(p);            /* reset catched signals */
1.183     junyoung 1115:
1.164     thorpej  1116:        l->l_ctxlink = NULL;    /* reset ucontext link */
1.55      cgd      1117:
1.255     christos 1118:
1.55      cgd      1119:        p->p_acflag &= ~AFORK;
1.272     ad       1120:        mutex_enter(p->p_lock);
1.238     pavel    1121:        p->p_flag |= PK_EXEC;
1.272     ad       1122:        mutex_exit(p->p_lock);
1.237     ad       1123:
                   1124:        /*
                   1125:         * Stop profiling.
                   1126:         */
                   1127:        if ((p->p_stflag & PST_PROFIL) != 0) {
                   1128:                mutex_spin_enter(&p->p_stmutex);
                   1129:                stopprofclock(p);
                   1130:                mutex_spin_exit(&p->p_stmutex);
                   1131:        }
                   1132:
                   1133:        /*
1.275     ad       1134:         * It's OK to test PL_PPWAIT unlocked here, as other LWPs have
1.237     ad       1135:         * exited and exec()/exit() are the only places it will be cleared.
                   1136:         */
1.275     ad       1137:        if ((p->p_lflag & PL_PPWAIT) != 0) {
1.271     ad       1138:                mutex_enter(proc_lock);
1.308     pooka    1139:                l->l_lwpctl = NULL; /* was on loan from blocked parent */
1.275     ad       1140:                p->p_lflag &= ~PL_PPWAIT;
1.237     ad       1141:                cv_broadcast(&p->p_pptr->p_waitcv);
1.271     ad       1142:                mutex_exit(proc_lock);
1.55      cgd      1143:        }
                   1144:
                   1145:        /*
1.237     ad       1146:         * Deal with set[ug]id.  MNT_NOSUID has already been used to disable
                   1147:         * s[ug]id.  It's OK to check for PSL_TRACED here as we have blocked
                   1148:         * out additional references on the process for the moment.
1.55      cgd      1149:         */
1.237     ad       1150:        if ((p->p_slflag & PSL_TRACED) == 0 &&
1.141     thorpej  1151:
1.337     martin   1152:            (((data->ed_attr.va_mode & S_ISUID) != 0 &&
                   1153:              kauth_cred_geteuid(l->l_cred) != data->ed_attr.va_uid) ||
1.141     thorpej  1154:
1.337     martin   1155:             ((data->ed_attr.va_mode & S_ISGID) != 0 &&
                   1156:              kauth_cred_getegid(l->l_cred) != data->ed_attr.va_gid))) {
1.141     thorpej  1157:                /*
                   1158:                 * Mark the process as SUGID before we do
                   1159:                 * anything that might block.
                   1160:                 */
1.237     ad       1161:                proc_crmod_enter();
1.240     thorpej  1162:                proc_crmod_leave(NULL, NULL, true);
1.152     christos 1163:
                   1164:                /* Make sure file descriptors 0..2 are in use. */
1.270     ad       1165:                if ((error = fd_checkstd()) != 0) {
1.312     christos 1166:                        DPRINTF(("%s: fdcheckstd failed %d\n",
                   1167:                            __func__, error));
1.152     christos 1168:                        goto exec_abort;
1.209     christos 1169:                }
1.141     thorpej  1170:
1.220     ad       1171:                /*
                   1172:                 * Copy the credential so other references don't see our
                   1173:                 * changes.
                   1174:                 */
1.221     ad       1175:                l->l_cred = kauth_cred_copy(l->l_cred);
1.55      cgd      1176: #ifdef KTRACE
                   1177:                /*
1.268     elad     1178:                 * If the persistent trace flag isn't set, turn off.
1.55      cgd      1179:                 */
1.237     ad       1180:                if (p->p_tracep) {
1.247     ad       1181:                        mutex_enter(&ktrace_lock);
1.268     elad     1182:                        if (!(p->p_traceflag & KTRFAC_PERSISTENT))
1.237     ad       1183:                                ktrderef(p);
1.247     ad       1184:                        mutex_exit(&ktrace_lock);
1.237     ad       1185:                }
1.55      cgd      1186: #endif
1.337     martin   1187:                if (data->ed_attr.va_mode & S_ISUID)
                   1188:                        kauth_cred_seteuid(l->l_cred, data->ed_attr.va_uid);
                   1189:                if (data->ed_attr.va_mode & S_ISGID)
                   1190:                        kauth_cred_setegid(l->l_cred, data->ed_attr.va_gid);
1.210     christos 1191:        } else {
1.221     ad       1192:                if (kauth_cred_geteuid(l->l_cred) ==
                   1193:                    kauth_cred_getuid(l->l_cred) &&
                   1194:                    kauth_cred_getegid(l->l_cred) ==
                   1195:                    kauth_cred_getgid(l->l_cred))
1.238     pavel    1196:                        p->p_flag &= ~PK_SUGID;
1.210     christos 1197:        }
1.220     ad       1198:
                   1199:        /*
                   1200:         * Copy the credential so other references don't see our changes.
                   1201:         * Test to see if this is necessary first, since in the common case
                   1202:         * we won't need a private reference.
                   1203:         */
1.221     ad       1204:        if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
                   1205:            kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
                   1206:                l->l_cred = kauth_cred_copy(l->l_cred);
                   1207:                kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
                   1208:                kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
1.220     ad       1209:        }
1.155     gmcgarry 1210:
1.221     ad       1211:        /* Update the master credentials. */
1.227     ad       1212:        if (l->l_cred != p->p_cred) {
                   1213:                kauth_cred_t ocred;
                   1214:
                   1215:                kauth_cred_hold(l->l_cred);
1.272     ad       1216:                mutex_enter(p->p_lock);
1.227     ad       1217:                ocred = p->p_cred;
                   1218:                p->p_cred = l->l_cred;
1.272     ad       1219:                mutex_exit(p->p_lock);
1.227     ad       1220:                kauth_cred_free(ocred);
                   1221:        }
1.221     ad       1222:
1.155     gmcgarry 1223: #if defined(__HAVE_RAS)
                   1224:        /*
                   1225:         * Remove all RASs from the address space.
                   1226:         */
1.251     ad       1227:        ras_purgeall();
1.155     gmcgarry 1228: #endif
1.107     fvdl     1229:
                   1230:        doexechooks(p);
1.55      cgd      1231:
                   1232:        /* setup new registers and do misc. setup. */
1.337     martin   1233:        (*data->ed_pack.ep_esch->es_emul->e_setregs)(l, &data->ed_pack,
                   1234:             (vaddr_t)stack);
                   1235:        if (data->ed_pack.ep_esch->es_setregs)
                   1236:                (*data->ed_pack.ep_esch->es_setregs)(l, &data->ed_pack,
                   1237:                    (vaddr_t)stack);
1.55      cgd      1238:
1.309     joerg    1239:        /* Provide a consistent LWP private setting */
                   1240:        (void)lwp_setprivate(l, NULL);
                   1241:
1.316     matt     1242:        /* Discard all PCU state; need to start fresh */
                   1243:        pcu_discard_all(l);
                   1244:
1.171     chs      1245:        /* map the process's signal trampoline code */
1.337     martin   1246:        if ((error = exec_sigcode_map(p, data->ed_pack.ep_esch->es_emul)) != 0) {
1.312     christos 1247:                DPRINTF(("%s: map sigcode failed %d\n", __func__, error));
1.171     chs      1248:                goto exec_abort;
1.209     christos 1249:        }
1.171     chs      1250:
1.337     martin   1251:        pool_put(&exec_pool, data->ed_argp);
1.276     ad       1252:
                   1253:        /* notify others that we exec'd */
                   1254:        KNOTE(&p->p_klist, NOTE_EXEC);
                   1255:
1.337     martin   1256:        kmem_free(data->ed_pack.ep_hdr, data->ed_pack.ep_hdrlen);
1.122     jdolecek 1257:
1.339     martin   1258:        SDT_PROBE(proc,,,exec_success, data->ed_pack.ep_name, 0, 0, 0, 0);
1.294     darran   1259:
1.244     dsl      1260:        /* The emulation root will usually have been found when we looked
                   1261:         * for the elf interpreter (or similar), if not look now. */
1.337     martin   1262:        if (data->ed_pack.ep_esch->es_emul->e_path != NULL &&
                   1263:            data->ed_pack.ep_emul_root == NULL)
                   1264:                emul_find_root(l, &data->ed_pack);
1.244     dsl      1265:
                   1266:        /* Any old emulation root got removed by fdcloseexec */
1.259     ad       1267:        rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
1.337     martin   1268:        p->p_cwdi->cwdi_edir = data->ed_pack.ep_emul_root;
1.259     ad       1269:        rw_exit(&p->p_cwdi->cwdi_lock);
1.337     martin   1270:        data->ed_pack.ep_emul_root = NULL;
                   1271:        if (data->ed_pack.ep_interp != NULL)
                   1272:                vrele(data->ed_pack.ep_interp);
1.244     dsl      1273:
1.122     jdolecek 1274:        /*
1.194     peter    1275:         * Call emulation specific exec hook. This can setup per-process
1.122     jdolecek 1276:         * p->p_emuldata or do any other per-process stuff an emulation needs.
                   1277:         *
                   1278:         * If we are executing process of different emulation than the
                   1279:         * original forked process, call e_proc_exit() of the old emulation
                   1280:         * first, then e_proc_exec() of new emulation. If the emulation is
                   1281:         * same, the exec hook code should deallocate any old emulation
                   1282:         * resources held previously by this process.
                   1283:         */
1.124     jdolecek 1284:        if (p->p_emul && p->p_emul->e_proc_exit
1.337     martin   1285:            && p->p_emul != data->ed_pack.ep_esch->es_emul)
1.122     jdolecek 1286:                (*p->p_emul->e_proc_exit)(p);
                   1287:
1.123     jdolecek 1288:        /*
1.299     chs      1289:         * This is now LWP 1.
                   1290:         */
                   1291:        mutex_enter(p->p_lock);
                   1292:        p->p_nlwpid = 1;
                   1293:        l->l_lid = 1;
                   1294:        mutex_exit(p->p_lock);
                   1295:
                   1296:        /*
1.123     jdolecek 1297:         * Call exec hook. Emulation code may NOT store reference to anything
                   1298:         * from &pack.
                   1299:         */
1.337     martin   1300:        if (data->ed_pack.ep_esch->es_emul->e_proc_exec)
                   1301:                (*data->ed_pack.ep_esch->es_emul->e_proc_exec)(p, &data->ed_pack);
1.122     jdolecek 1302:
                   1303:        /* update p_emul, the old value is no longer needed */
1.337     martin   1304:        p->p_emul = data->ed_pack.ep_esch->es_emul;
1.148     thorpej  1305:
                   1306:        /* ...and the same for p_execsw */
1.337     martin   1307:        p->p_execsw = data->ed_pack.ep_esch;
1.148     thorpej  1308:
1.133     mycroft  1309: #ifdef __HAVE_SYSCALL_INTERN
                   1310:        (*p->p_emul->e_syscall_intern)(p);
                   1311: #endif
1.247     ad       1312:        ktremul();
1.85      mycroft  1313:
1.252     ad       1314:        /* Allow new references from the debugger/procfs. */
1.341     martin   1315:        rw_exit(&p->p_reflock);
1.237     ad       1316:        rw_exit(&exec_lock);
1.162     manu     1317:
1.271     ad       1318:        mutex_enter(proc_lock);
1.237     ad       1319:
                   1320:        if ((p->p_slflag & (PSL_TRACED|PSL_SYSCALL)) == PSL_TRACED) {
                   1321:                KSI_INIT_EMPTY(&ksi);
                   1322:                ksi.ksi_signo = SIGTRAP;
                   1323:                ksi.ksi_lid = l->l_lid;
                   1324:                kpsignal(p, &ksi, NULL);
                   1325:        }
1.162     manu     1326:
1.237     ad       1327:        if (p->p_sflag & PS_STOPEXEC) {
                   1328:                KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1.175     dsl      1329:                p->p_pptr->p_nstopchild++;
1.237     ad       1330:                p->p_pptr->p_waited = 0;
1.272     ad       1331:                mutex_enter(p->p_lock);
1.237     ad       1332:                ksiginfo_queue_init(&kq);
                   1333:                sigclearall(p, &contsigmask, &kq);
                   1334:                lwp_lock(l);
                   1335:                l->l_stat = LSSTOP;
1.162     manu     1336:                p->p_stat = SSTOP;
1.164     thorpej  1337:                p->p_nrlwps--;
1.304     rmind    1338:                lwp_unlock(l);
1.272     ad       1339:                mutex_exit(p->p_lock);
1.271     ad       1340:                mutex_exit(proc_lock);
1.304     rmind    1341:                lwp_lock(l);
1.245     yamt     1342:                mi_switch(l);
1.237     ad       1343:                ksiginfo_queue_drain(&kq);
                   1344:                KERNEL_LOCK(l->l_biglocks, l);
                   1345:        } else {
1.271     ad       1346:                mutex_exit(proc_lock);
1.162     manu     1347:        }
                   1348:
1.337     martin   1349:        pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
                   1350:        pathbuf_destroy(data->ed_pathbuf);
                   1351:        PNBUF_PUT(data->ed_resolvedpathbuf);
1.327     reinoud  1352:        DPRINTF(("%s finished\n", __func__));
1.85      mycroft  1353:        return (EJUSTRETURN);
1.55      cgd      1354:
1.138     lukem    1355:  exec_abort:
1.294     darran   1356:        SDT_PROBE(proc,,,exec_failure, error, 0, 0, 0, 0);
1.297     rmind    1357:        rw_exit(&p->p_reflock);
                   1358:        rw_exit(&exec_lock);
                   1359:
1.337     martin   1360:        pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
                   1361:        pathbuf_destroy(data->ed_pathbuf);
                   1362:        PNBUF_PUT(data->ed_resolvedpathbuf);
1.130     jdolecek 1363:
1.55      cgd      1364:        /*
                   1365:         * the old process doesn't exist anymore.  exit gracefully.
                   1366:         * get rid of the (new) address space we have created, if any, get rid
                   1367:         * of our namei data and vnode, and exit noting failure
                   1368:         */
1.88      mrg      1369:        uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
                   1370:                VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
1.337     martin   1371:        exec_free_emul_arg(&data->ed_pack);
                   1372:        pool_put(&exec_pool, data->ed_argp);
                   1373:        kmem_free(data->ed_pack.ep_hdr, data->ed_pack.ep_hdrlen);
                   1374:        if (data->ed_pack.ep_emul_root != NULL)
                   1375:                vrele(data->ed_pack.ep_emul_root);
                   1376:        if (data->ed_pack.ep_interp != NULL)
                   1377:                vrele(data->ed_pack.ep_interp);
1.237     ad       1378:
1.252     ad       1379:        /* Acquire the sched-state mutex (exit1() will release it). */
1.337     martin   1380:        if (!proc_is_new) {
                   1381:                mutex_enter(p->p_lock);
                   1382:                exit1(l, W_EXITCODE(error, SIGABRT));
                   1383:        }
1.55      cgd      1384:
                   1385:        /* NOTREACHED */
                   1386:        return 0;
1.67      christos 1387: }
                   1388:
1.144     christos 1389: int
1.337     martin   1390: execve1(struct lwp *l, const char *path, char * const *args,
                   1391:     char * const *envs, execve_fetch_element_t fetch_element)
                   1392: {
                   1393:        struct execve_data data;
                   1394:        int error;
                   1395:
                   1396:        error = execve_loadvm(l, path, args, envs, fetch_element, &data);
                   1397:        if (error)
                   1398:                return error;
                   1399:        error = execve_runproc(l, &data);
                   1400:        return error;
                   1401: }
                   1402:
                   1403: int
1.231     yamt     1404: copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
                   1405:     char **stackp, void *argp)
1.67      christos 1406: {
1.138     lukem    1407:        char    **cpp, *dp, *sp;
                   1408:        size_t  len;
                   1409:        void    *nullp;
                   1410:        long    argc, envc;
1.144     christos 1411:        int     error;
1.138     lukem    1412:
1.144     christos 1413:        cpp = (char **)*stackp;
1.138     lukem    1414:        nullp = NULL;
                   1415:        argc = arginfo->ps_nargvstr;
                   1416:        envc = arginfo->ps_nenvstr;
1.305     matt     1417:        if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0) {
1.312     christos 1418:                COPYPRINTF("", cpp - 1, sizeof(argc));
1.144     christos 1419:                return error;
1.305     matt     1420:        }
1.67      christos 1421:
1.244     dsl      1422:        dp = (char *) (cpp + argc + envc + 2 + pack->ep_esch->es_arglen);
1.67      christos 1423:        sp = argp;
                   1424:
                   1425:        /* XXX don't copy them out, remap them! */
1.69      mycroft  1426:        arginfo->ps_argvstr = cpp; /* remember location of argv for later */
1.67      christos 1427:
1.305     matt     1428:        for (; --argc >= 0; sp += len, dp += len) {
                   1429:                if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
1.312     christos 1430:                        COPYPRINTF("", cpp - 1, sizeof(dp));
1.305     matt     1431:                        return error;
                   1432:                }
                   1433:                if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
1.313     jakllsch 1434:                        COPYPRINTF("str", dp, (size_t)ARG_MAX);
1.144     christos 1435:                        return error;
1.305     matt     1436:                }
                   1437:        }
1.67      christos 1438:
1.305     matt     1439:        if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
1.312     christos 1440:                COPYPRINTF("", cpp - 1, sizeof(nullp));
1.144     christos 1441:                return error;
1.305     matt     1442:        }
1.67      christos 1443:
1.69      mycroft  1444:        arginfo->ps_envstr = cpp; /* remember location of envp for later */
1.67      christos 1445:
1.305     matt     1446:        for (; --envc >= 0; sp += len, dp += len) {
                   1447:                if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
1.312     christos 1448:                        COPYPRINTF("", cpp - 1, sizeof(dp));
1.144     christos 1449:                        return error;
1.305     matt     1450:                }
                   1451:                if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
1.313     jakllsch 1452:                        COPYPRINTF("str", dp, (size_t)ARG_MAX);
1.305     matt     1453:                        return error;
                   1454:                }
1.337     martin   1455:
1.305     matt     1456:        }
1.67      christos 1457:
1.305     matt     1458:        if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
1.312     christos 1459:                COPYPRINTF("", cpp - 1, sizeof(nullp));
1.144     christos 1460:                return error;
1.305     matt     1461:        }
1.67      christos 1462:
1.144     christos 1463:        *stackp = (char *)cpp;
                   1464:        return 0;
1.55      cgd      1465: }
1.130     jdolecek 1466:
                   1467:
                   1468: /*
1.282     ad       1469:  * Add execsw[] entries.
1.130     jdolecek 1470:  */
                   1471: int
1.282     ad       1472: exec_add(struct execsw *esp, int count)
1.130     jdolecek 1473: {
1.282     ad       1474:        struct exec_entry       *it;
                   1475:        int                     i;
1.130     jdolecek 1476:
1.283     ad       1477:        if (count == 0) {
                   1478:                return 0;
                   1479:        }
1.130     jdolecek 1480:
1.282     ad       1481:        /* Check for duplicates. */
1.237     ad       1482:        rw_enter(&exec_lock, RW_WRITER);
1.282     ad       1483:        for (i = 0; i < count; i++) {
                   1484:                LIST_FOREACH(it, &ex_head, ex_list) {
                   1485:                        /* assume unique (makecmds, probe_func, emulation) */
                   1486:                        if (it->ex_sw->es_makecmds == esp[i].es_makecmds &&
                   1487:                            it->ex_sw->u.elf_probe_func ==
                   1488:                            esp[i].u.elf_probe_func &&
                   1489:                            it->ex_sw->es_emul == esp[i].es_emul) {
                   1490:                                rw_exit(&exec_lock);
                   1491:                                return EEXIST;
1.130     jdolecek 1492:                        }
                   1493:                }
                   1494:        }
                   1495:
1.282     ad       1496:        /* Allocate new entries. */
                   1497:        for (i = 0; i < count; i++) {
                   1498:                it = kmem_alloc(sizeof(*it), KM_SLEEP);
                   1499:                it->ex_sw = &esp[i];
                   1500:                LIST_INSERT_HEAD(&ex_head, it, ex_list);
1.130     jdolecek 1501:        }
                   1502:
                   1503:        /* update execsw[] */
                   1504:        exec_init(0);
1.237     ad       1505:        rw_exit(&exec_lock);
1.282     ad       1506:        return 0;
1.130     jdolecek 1507: }
                   1508:
                   1509: /*
                   1510:  * Remove execsw[] entry.
                   1511:  */
                   1512: int
1.282     ad       1513: exec_remove(struct execsw *esp, int count)
1.130     jdolecek 1514: {
1.282     ad       1515:        struct exec_entry       *it, *next;
                   1516:        int                     i;
                   1517:        const struct proclist_desc *pd;
                   1518:        proc_t                  *p;
                   1519:
1.283     ad       1520:        if (count == 0) {
                   1521:                return 0;
                   1522:        }
1.130     jdolecek 1523:
1.282     ad       1524:        /* Abort if any are busy. */
1.237     ad       1525:        rw_enter(&exec_lock, RW_WRITER);
1.282     ad       1526:        for (i = 0; i < count; i++) {
                   1527:                mutex_enter(proc_lock);
                   1528:                for (pd = proclists; pd->pd_list != NULL; pd++) {
                   1529:                        PROCLIST_FOREACH(p, pd->pd_list) {
                   1530:                                if (p->p_execsw == &esp[i]) {
                   1531:                                        mutex_exit(proc_lock);
                   1532:                                        rw_exit(&exec_lock);
                   1533:                                        return EBUSY;
                   1534:                                }
                   1535:                        }
                   1536:                }
                   1537:                mutex_exit(proc_lock);
                   1538:        }
1.130     jdolecek 1539:
1.282     ad       1540:        /* None are busy, so remove them all. */
                   1541:        for (i = 0; i < count; i++) {
                   1542:                for (it = LIST_FIRST(&ex_head); it != NULL; it = next) {
                   1543:                        next = LIST_NEXT(it, ex_list);
                   1544:                        if (it->ex_sw == &esp[i]) {
                   1545:                                LIST_REMOVE(it, ex_list);
                   1546:                                kmem_free(it, sizeof(*it));
                   1547:                                break;
                   1548:                        }
                   1549:                }
1.130     jdolecek 1550:        }
                   1551:
                   1552:        /* update execsw[] */
                   1553:        exec_init(0);
1.237     ad       1554:        rw_exit(&exec_lock);
1.282     ad       1555:        return 0;
1.130     jdolecek 1556: }
                   1557:
                   1558: /*
                   1559:  * Initialize exec structures. If init_boot is true, also does necessary
                   1560:  * one-time initialization (it's called from main() that way).
1.147     jdolecek 1561:  * Once system is multiuser, this should be called with exec_lock held,
1.130     jdolecek 1562:  * i.e. via exec_{add|remove}().
                   1563:  */
                   1564: int
1.138     lukem    1565: exec_init(int init_boot)
1.130     jdolecek 1566: {
1.282     ad       1567:        const struct execsw     **sw;
                   1568:        struct exec_entry       *ex;
                   1569:        SLIST_HEAD(,exec_entry) first;
                   1570:        SLIST_HEAD(,exec_entry) any;
                   1571:        SLIST_HEAD(,exec_entry) last;
                   1572:        int                     i, sz;
1.130     jdolecek 1573:
                   1574:        if (init_boot) {
                   1575:                /* do one-time initializations */
1.237     ad       1576:                rw_init(&exec_lock);
1.259     ad       1577:                mutex_init(&sigobject_lock, MUTEX_DEFAULT, IPL_NONE);
1.277     ad       1578:                pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
                   1579:                    "execargs", &exec_palloc, IPL_NONE);
                   1580:                pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
1.282     ad       1581:        } else {
                   1582:                KASSERT(rw_write_held(&exec_lock));
                   1583:        }
1.130     jdolecek 1584:
1.282     ad       1585:        /* Sort each entry onto the appropriate queue. */
                   1586:        SLIST_INIT(&first);
                   1587:        SLIST_INIT(&any);
                   1588:        SLIST_INIT(&last);
                   1589:        sz = 0;
                   1590:        LIST_FOREACH(ex, &ex_head, ex_list) {
                   1591:                switch(ex->ex_sw->es_prio) {
                   1592:                case EXECSW_PRIO_FIRST:
                   1593:                        SLIST_INSERT_HEAD(&first, ex, ex_slist);
                   1594:                        break;
                   1595:                case EXECSW_PRIO_ANY:
                   1596:                        SLIST_INSERT_HEAD(&any, ex, ex_slist);
                   1597:                        break;
                   1598:                case EXECSW_PRIO_LAST:
                   1599:                        SLIST_INSERT_HEAD(&last, ex, ex_slist);
                   1600:                        break;
                   1601:                default:
1.312     christos 1602:                        panic("%s", __func__);
1.282     ad       1603:                        break;
1.130     jdolecek 1604:                }
1.282     ad       1605:                sz++;
1.130     jdolecek 1606:        }
                   1607:
                   1608:        /*
1.282     ad       1609:         * Create new execsw[].  Ensure we do not try a zero-sized
                   1610:         * allocation.
1.130     jdolecek 1611:         */
1.282     ad       1612:        sw = kmem_alloc(sz * sizeof(struct execsw *) + 1, KM_SLEEP);
                   1613:        i = 0;
                   1614:        SLIST_FOREACH(ex, &first, ex_slist) {
                   1615:                sw[i++] = ex->ex_sw;
                   1616:        }
                   1617:        SLIST_FOREACH(ex, &any, ex_slist) {
                   1618:                sw[i++] = ex->ex_sw;
                   1619:        }
                   1620:        SLIST_FOREACH(ex, &last, ex_slist) {
                   1621:                sw[i++] = ex->ex_sw;
1.130     jdolecek 1622:        }
1.183     junyoung 1623:
1.282     ad       1624:        /* Replace old execsw[] and free used memory. */
                   1625:        if (execsw != NULL) {
                   1626:                kmem_free(__UNCONST(execsw),
                   1627:                    nexecs * sizeof(struct execsw *) + 1);
1.130     jdolecek 1628:        }
1.282     ad       1629:        execsw = sw;
                   1630:        nexecs = sz;
1.130     jdolecek 1631:
1.282     ad       1632:        /* Figure out the maximum size of an exec header. */
                   1633:        exec_maxhdrsz = sizeof(int);
1.130     jdolecek 1634:        for (i = 0; i < nexecs; i++) {
                   1635:                if (execsw[i]->es_hdrsz > exec_maxhdrsz)
                   1636:                        exec_maxhdrsz = execsw[i]->es_hdrsz;
                   1637:        }
                   1638:
                   1639:        return 0;
                   1640: }
1.171     chs      1641:
                   1642: static int
                   1643: exec_sigcode_map(struct proc *p, const struct emul *e)
                   1644: {
                   1645:        vaddr_t va;
                   1646:        vsize_t sz;
                   1647:        int error;
                   1648:        struct uvm_object *uobj;
                   1649:
1.184     drochner 1650:        sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
                   1651:
                   1652:        if (e->e_sigobject == NULL || sz == 0) {
1.171     chs      1653:                return 0;
                   1654:        }
                   1655:
                   1656:        /*
                   1657:         * If we don't have a sigobject for this emulation, create one.
                   1658:         *
                   1659:         * sigobject is an anonymous memory object (just like SYSV shared
                   1660:         * memory) that we keep a permanent reference to and that we map
                   1661:         * in all processes that need this sigcode. The creation is simple,
                   1662:         * we create an object, add a permanent reference to it, map it in
                   1663:         * kernel space, copy out the sigcode to it and unmap it.
1.189     jdolecek 1664:         * We map it with PROT_READ|PROT_EXEC into the process just
                   1665:         * the way sys_mmap() would map it.
1.171     chs      1666:         */
                   1667:
                   1668:        uobj = *e->e_sigobject;
                   1669:        if (uobj == NULL) {
1.259     ad       1670:                mutex_enter(&sigobject_lock);
                   1671:                if ((uobj = *e->e_sigobject) == NULL) {
                   1672:                        uobj = uao_create(sz, 0);
                   1673:                        (*uobj->pgops->pgo_reference)(uobj);
                   1674:                        va = vm_map_min(kernel_map);
                   1675:                        if ((error = uvm_map(kernel_map, &va, round_page(sz),
                   1676:                            uobj, 0, 0,
                   1677:                            UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
                   1678:                            UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
                   1679:                                printf("kernel mapping failed %d\n", error);
                   1680:                                (*uobj->pgops->pgo_detach)(uobj);
                   1681:                                mutex_exit(&sigobject_lock);
                   1682:                                return (error);
                   1683:                        }
                   1684:                        memcpy((void *)va, e->e_sigcode, sz);
1.171     chs      1685: #ifdef PMAP_NEED_PROCWR
1.259     ad       1686:                        pmap_procwr(&proc0, va, sz);
1.171     chs      1687: #endif
1.259     ad       1688:                        uvm_unmap(kernel_map, va, va + round_page(sz));
                   1689:                        *e->e_sigobject = uobj;
                   1690:                }
                   1691:                mutex_exit(&sigobject_lock);
1.171     chs      1692:        }
                   1693:
1.172     enami    1694:        /* Just a hint to uvm_map where to put it. */
1.195     fvdl     1695:        va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
                   1696:            round_page(sz));
1.187     chs      1697:
                   1698: #ifdef __alpha__
                   1699:        /*
                   1700:         * Tru64 puts /sbin/loader at the end of user virtual memory,
                   1701:         * which causes the above calculation to put the sigcode at
                   1702:         * an invalid address.  Put it just below the text instead.
                   1703:         */
1.193     jmc      1704:        if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
1.187     chs      1705:                va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
                   1706:        }
                   1707: #endif
                   1708:
1.171     chs      1709:        (*uobj->pgops->pgo_reference)(uobj);
                   1710:        error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
                   1711:                        uobj, 0, 0,
                   1712:                        UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
                   1713:                                    UVM_ADV_RANDOM, 0));
                   1714:        if (error) {
1.312     christos 1715:                DPRINTF(("%s, %d: map %p "
1.305     matt     1716:                    "uvm_map %#"PRIxVSIZE"@%#"PRIxVADDR" failed %d\n",
1.312     christos 1717:                    __func__, __LINE__, &p->p_vmspace->vm_map, round_page(sz),
                   1718:                    va, error));
1.171     chs      1719:                (*uobj->pgops->pgo_detach)(uobj);
                   1720:                return (error);
                   1721:        }
                   1722:        p->p_sigctx.ps_sigcode = (void *)va;
                   1723:        return (0);
                   1724: }
1.336     matt     1725:
1.337     martin   1726: /*
                   1727:  * A child lwp of a posix_spawn operation starts here and ends up in
                   1728:  * cpu_spawn_return, dealing with all filedescriptor and scheduler
                   1729:  * manipulations in between.
                   1730:  */
                   1731: static void
                   1732: spawn_return(void *arg)
                   1733: {
                   1734:        struct spawn_exec_data *spawn_data = arg;
                   1735:        struct lwp *l = curlwp;
                   1736:        int error, newfd;
                   1737:        size_t i;
                   1738:        const struct posix_spawn_file_actions_entry *fae;
                   1739:        register_t retval;
1.341     martin   1740:        bool have_reflock;
                   1741:
                   1742:        /*
                   1743:         * The following actions may block, so we need a temporary
                   1744:         * vmspace - borrow the kernel one
                   1745:         */
                   1746:        KPREEMPT_DISABLE(l);
                   1747:        l->l_proc->p_vmspace = proc0.p_vmspace;
                   1748:        pmap_activate(l);
                   1749:        KPREEMPT_ENABLE(l);
                   1750:
                   1751:        /* don't allow debugger access yet */
                   1752:        rw_enter(&l->l_proc->p_reflock, RW_WRITER);
                   1753:        have_reflock = true;
1.337     martin   1754:
1.338     martin   1755:        error = 0;
1.337     martin   1756:        /* handle posix_spawn_file_actions */
                   1757:        if (spawn_data->sed_actions != NULL) {
                   1758:                for (i = 0; i < spawn_data->sed_actions_len; i++) {
                   1759:                        fae = &spawn_data->sed_actions[i];
                   1760:                        switch (fae->fae_action) {
                   1761:                        case FAE_OPEN:
1.338     martin   1762:                                if (fd_getfile(fae->fae_fildes) != NULL) {
                   1763:                                        error = fd_close(fae->fae_fildes);
                   1764:                                        if (error)
                   1765:                                                break;
                   1766:                                }
1.337     martin   1767:                                error = fd_open(fae->fae_path, fae->fae_oflag,
                   1768:                                    fae->fae_mode, &newfd);
1.338     martin   1769:                                if (error)
                   1770:                                        break;
1.337     martin   1771:                                if (newfd != fae->fae_fildes) {
                   1772:                                        error = dodup(l, newfd,
                   1773:                                            fae->fae_fildes, 0, &retval);
                   1774:                                        if (fd_getfile(newfd) != NULL)
                   1775:                                                fd_close(newfd);
                   1776:                                }
                   1777:                                break;
                   1778:                        case FAE_DUP2:
                   1779:                                error = dodup(l, fae->fae_fildes,
                   1780:                                    fae->fae_newfildes, 0, &retval);
                   1781:                                break;
                   1782:                        case FAE_CLOSE:
                   1783:                                if (fd_getfile(fae->fae_fildes) == NULL) {
                   1784:                                        error = EBADF;
                   1785:                                        break;
                   1786:                                }
                   1787:                                error = fd_close(fae->fae_fildes);
                   1788:                                break;
                   1789:                        }
                   1790:                        if (error)
                   1791:                                goto report_error;
                   1792:                }
                   1793:        }
                   1794:
                   1795:        /* handle posix_spawnattr */
                   1796:        if (spawn_data->sed_attrs != NULL) {
                   1797:                struct sigaction sigact;
                   1798:                sigact._sa_u._sa_handler = SIG_DFL;
                   1799:                sigact.sa_flags = 0;
                   1800:
                   1801:                /*
                   1802:                 * set state to SSTOP so that this proc can be found by pid.
                   1803:                 * see proc_enterprp, do_sched_setparam below
                   1804:                 */
                   1805:                l->l_proc->p_stat = SSTOP;
                   1806:
                   1807:                /* Set process group */
                   1808:                if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETPGROUP) {
                   1809:                        pid_t mypid = l->l_proc->p_pid,
                   1810:                             pgrp = spawn_data->sed_attrs->sa_pgroup;
                   1811:
                   1812:                        if (pgrp == 0)
                   1813:                                pgrp = mypid;
                   1814:
                   1815:                        error = proc_enterpgrp(spawn_data->sed_parent,
                   1816:                            mypid, pgrp, false);
                   1817:                        if (error)
                   1818:                                goto report_error;
                   1819:                }
                   1820:
                   1821:                /* Set scheduler policy */
                   1822:                if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSCHEDULER)
                   1823:                        error = do_sched_setparam(l->l_proc->p_pid, 0,
                   1824:                            spawn_data->sed_attrs->sa_schedpolicy,
                   1825:                            &spawn_data->sed_attrs->sa_schedparam);
                   1826:                else if (spawn_data->sed_attrs->sa_flags
                   1827:                    & POSIX_SPAWN_SETSCHEDPARAM) {
                   1828:                        error = do_sched_setparam(spawn_data->sed_parent->p_pid, 0,
                   1829:                            SCHED_NONE, &spawn_data->sed_attrs->sa_schedparam);
                   1830:                }
                   1831:                if (error)
                   1832:                        goto report_error;
                   1833:
                   1834:                /* Reset user ID's */
                   1835:                if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_RESETIDS) {
                   1836:                        error = do_setresuid(l, -1,
                   1837:                             kauth_cred_getgid(l->l_cred), -1,
                   1838:                             ID_E_EQ_R | ID_E_EQ_S);
                   1839:                        if (error)
                   1840:                                goto report_error;
                   1841:                        error = do_setresuid(l, -1,
                   1842:                            kauth_cred_getuid(l->l_cred), -1,
                   1843:                            ID_E_EQ_R | ID_E_EQ_S);
                   1844:                        if (error)
                   1845:                                goto report_error;
                   1846:                }
                   1847:
                   1848:                /* Set signal masks/defaults */
                   1849:                if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSIGMASK) {
                   1850:                        mutex_enter(l->l_proc->p_lock);
                   1851:                        error = sigprocmask1(l, SIG_SETMASK,
                   1852:                            &spawn_data->sed_attrs->sa_sigmask, NULL);
                   1853:                        mutex_exit(l->l_proc->p_lock);
                   1854:                        if (error)
                   1855:                                goto report_error;
                   1856:                }
                   1857:
                   1858:                if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSIGDEF) {
                   1859:                        for (i = 1; i <= NSIG; i++) {
                   1860:                                if (sigismember(
                   1861:                                    &spawn_data->sed_attrs->sa_sigdefault, i))
                   1862:                                        sigaction1(l, i, &sigact, NULL, NULL,
                   1863:                                            0);
                   1864:                        }
                   1865:                }
                   1866:        }
                   1867:
1.341     martin   1868:        /* stop using kernel vmspace */
                   1869:        KPREEMPT_DISABLE(l);
                   1870:        pmap_deactivate(l);
                   1871:        l->l_proc->p_vmspace = NULL;
                   1872:        KPREEMPT_ENABLE(l);
                   1873:
1.337     martin   1874:
                   1875:        /* now do the real exec */
                   1876:        rw_enter(&exec_lock, RW_READER);
                   1877:        error = execve_runproc(l, &spawn_data->sed_exec);
1.341     martin   1878:        have_reflock = false;
1.337     martin   1879:        if (error == EJUSTRETURN)
                   1880:                error = 0;
                   1881:        else if (error)
                   1882:                goto report_error;
                   1883:
                   1884:        /* done, signal parent */
                   1885:        mutex_enter(&spawn_data->sed_mtx_child);
                   1886:        cv_signal(&spawn_data->sed_cv_child_ready);
                   1887:        mutex_exit(&spawn_data->sed_mtx_child);
                   1888:
                   1889:        /* and finaly: leave to userland for the first time */
                   1890:        cpu_spawn_return(l);
                   1891:
                   1892:        /* NOTREACHED */
                   1893:        return;
                   1894:
                   1895:  report_error:
1.341     martin   1896:        if (have_reflock)
                   1897:                rw_exit(&l->l_proc->p_reflock);
                   1898:
                   1899:        /* stop using kernel vmspace (if we haven't already) */
                   1900:        if (l->l_proc->p_vmspace) {
                   1901:                KPREEMPT_DISABLE(l);
                   1902:                pmap_deactivate(l);
                   1903:                l->l_proc->p_vmspace = NULL;
                   1904:                KPREEMPT_ENABLE(l);
1.337     martin   1905:        }
                   1906:
                   1907:        /*
                   1908:         * Set error value for parent to pick up (and take over ownership
                   1909:         * of spawn_data again), signal parent and exit this process.
                   1910:         */
                   1911:        mutex_enter(&spawn_data->sed_mtx_child);
                   1912:        spawn_data->sed_error = error;
                   1913:        cv_signal(&spawn_data->sed_cv_child_ready);
                   1914:        mutex_exit(&spawn_data->sed_mtx_child);
                   1915:        mutex_enter(l->l_proc->p_lock);
                   1916:        exit1(l, W_EXITCODE(error, SIGABRT));
                   1917: }
                   1918:
1.342     christos 1919: static void
1.344   ! christos 1920: posix_spawn_fa_free(struct posix_spawn_file_actions *fa, size_t len)
1.342     christos 1921: {
                   1922:
1.344   ! christos 1923:        for (size_t i = 0; i < len; i++) {
1.342     christos 1924:                struct posix_spawn_file_actions_entry *fae = &fa->fae[i];
                   1925:                if (fae->fae_action != FAE_OPEN)
                   1926:                        continue;
                   1927:                kmem_free(fae->fae_path, strlen(fae->fae_path) + 1);
                   1928:        }
1.343     christos 1929:        if (fa->len)
                   1930:                kmem_free(fa->fae, sizeof(*fa->fae) * fa->len);
1.342     christos 1931:        kmem_free(fa, sizeof(*fa));
                   1932: }
                   1933:
                   1934: static int
                   1935: posix_spawn_fa_alloc(struct posix_spawn_file_actions **fap,
                   1936:     const struct posix_spawn_file_actions *ufa)
                   1937: {
                   1938:        struct posix_spawn_file_actions *fa;
                   1939:        struct posix_spawn_file_actions_entry *fae;
                   1940:        char *pbuf = NULL;
                   1941:        int error;
1.344   ! christos 1942:        size_t i = 0;
1.342     christos 1943:
                   1944:        fa = kmem_alloc(sizeof(*fa), KM_SLEEP);
                   1945:        error = copyin(ufa, fa, sizeof(*fa));
                   1946:        if (error) {
                   1947:                fa->fae = NULL;
                   1948:                fa->len = 0;
                   1949:                goto out;
                   1950:        }
                   1951:
                   1952:        if (fa->len == 0)
                   1953:                return 0;
                   1954:
                   1955:        size_t fal = fa->len * sizeof(*fae);
                   1956:        fae = fa->fae;
                   1957:        fa->fae = kmem_alloc(fal, KM_SLEEP);
                   1958:        error = copyin(fae, fa->fae, fal);
1.344   ! christos 1959:        if (error)
1.342     christos 1960:                goto out;
                   1961:
                   1962:        pbuf = PNBUF_GET();
1.344   ! christos 1963:        for (; i < fa->len; i++) {
1.342     christos 1964:                fae = &fa->fae[i];
                   1965:                if (fae->fae_action != FAE_OPEN)
                   1966:                        continue;
                   1967:                error = copyinstr(fae->fae_path, pbuf, MAXPATHLEN, &fal);
1.344   ! christos 1968:                if (error)
1.342     christos 1969:                        goto out;
                   1970:                fae->fae_path = kmem_alloc(fal, KM_SLEEP);
                   1971:                memcpy(fae->fae_path, pbuf, fal);
                   1972:        }
                   1973:        PNBUF_PUT(pbuf);
                   1974:        *fap = fa;
                   1975:        return 0;
                   1976: out:
                   1977:        if (pbuf)
                   1978:                PNBUF_PUT(pbuf);
1.344   ! christos 1979:        posix_spawn_fa_free(fa, i);
1.342     christos 1980:        return error;
                   1981: }
                   1982:
1.337     martin   1983: int
                   1984: sys_posix_spawn(struct lwp *l1, const struct sys_posix_spawn_args *uap,
                   1985:     register_t *retval)
                   1986: {
                   1987:        /* {
                   1988:                syscallarg(pid_t *) pid;
                   1989:                syscallarg(const char *) path;
                   1990:                syscallarg(const struct posix_spawn_file_actions *) file_actions;
                   1991:                syscallarg(const struct posix_spawnattr *) attrp;
                   1992:                syscallarg(char *const *) argv;
                   1993:                syscallarg(char *const *) envp;
                   1994:        } */
                   1995:
                   1996:        struct proc *p1, *p2;
                   1997:        struct plimit *p1_lim;
                   1998:        struct lwp *l2;
1.342     christos 1999:        int error = 0, tnprocs, count;
1.337     martin   2000:        struct posix_spawn_file_actions *fa = NULL;
                   2001:        struct posix_spawnattr *sa = NULL;
                   2002:        struct spawn_exec_data *spawn_data;
                   2003:        uid_t uid;
                   2004:        vaddr_t uaddr;
                   2005:        pid_t pid;
                   2006:        bool have_exec_lock = false;
                   2007:
                   2008:        p1 = l1->l_proc;
                   2009:        uid = kauth_cred_getuid(l1->l_cred);
                   2010:        tnprocs = atomic_inc_uint_nv(&nprocs);
                   2011:
                   2012:        /*
                   2013:         * Although process entries are dynamically created, we still keep
                   2014:         * a global limit on the maximum number we will create.
                   2015:         */
                   2016:        if (__predict_false(tnprocs >= maxproc))
                   2017:                error = -1;
                   2018:        else
                   2019:                error = kauth_authorize_process(l1->l_cred,
                   2020:                    KAUTH_PROCESS_FORK, p1, KAUTH_ARG(tnprocs), NULL, NULL);
                   2021:
                   2022:        if (error) {
                   2023:                atomic_dec_uint(&nprocs);
                   2024:                *retval = EAGAIN;
                   2025:                return 0;
                   2026:        }
                   2027:
                   2028:        /*
                   2029:         * Enforce limits.
                   2030:         */
                   2031:        count = chgproccnt(uid, 1);
                   2032:        if (kauth_authorize_generic(l1->l_cred, KAUTH_GENERIC_ISSUSER, NULL) !=
                   2033:            0 && __predict_false(count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur)) {
                   2034:                error = EAGAIN;
                   2035:                goto error_exit;
                   2036:        }
                   2037:
                   2038:        /* copy in file_actions struct */
                   2039:        if (SCARG(uap, file_actions) != NULL) {
1.342     christos 2040:                error = posix_spawn_fa_alloc(&fa, SCARG(uap, file_actions));
                   2041:                if (error)
1.337     martin   2042:                        goto error_exit;
                   2043:        }
1.342     christos 2044:
1.337     martin   2045:        /* copyin posix_spawnattr struct */
                   2046:        if (SCARG(uap, attrp) != NULL) {
1.342     christos 2047:                sa = kmem_alloc(sizeof(*sa), KM_SLEEP);
                   2048:                error = copyin(SCARG(uap, attrp), sa, sizeof(*sa));
1.337     martin   2049:                if (error)
                   2050:                        goto error_exit;
                   2051:        }
                   2052:
                   2053:        /*
                   2054:         * Do the first part of the exec now, collect state
                   2055:         * in spawn_data.
                   2056:         */
                   2057:        spawn_data = kmem_zalloc(sizeof(*spawn_data), KM_SLEEP);
                   2058:        error = execve_loadvm(l1, SCARG(uap, path), SCARG(uap, argv),
                   2059:            SCARG(uap, envp), execve_fetch_element, &spawn_data->sed_exec);
                   2060:        if (error == EJUSTRETURN)
                   2061:                error = 0;
                   2062:        else if (error)
                   2063:                goto error_exit;
                   2064:
                   2065:        have_exec_lock = true;
                   2066:
                   2067:        /*
                   2068:         * Allocate virtual address space for the U-area now, while it
                   2069:         * is still easy to abort the fork operation if we're out of
                   2070:         * kernel virtual address space.
                   2071:         */
                   2072:        uaddr = uvm_uarea_alloc();
                   2073:        if (__predict_false(uaddr == 0)) {
                   2074:                error = ENOMEM;
                   2075:                goto error_exit;
                   2076:        }
                   2077:
                   2078:        /*
                   2079:         * Allocate new proc. Leave it's p_vmspace NULL for now.
                   2080:         * This is a point of no return, we will have to go through
                   2081:         * the child proc to properly clean it up past this point.
                   2082:         */
                   2083:        p2 = proc_alloc();
                   2084:        pid = p2->p_pid;
                   2085:
                   2086:        /*
                   2087:         * Make a proc table entry for the new process.
                   2088:         * Start by zeroing the section of proc that is zero-initialized,
                   2089:         * then copy the section that is copied directly from the parent.
                   2090:         */
                   2091:        memset(&p2->p_startzero, 0,
                   2092:            (unsigned) ((char *)&p2->p_endzero - (char *)&p2->p_startzero));
                   2093:        memcpy(&p2->p_startcopy, &p1->p_startcopy,
                   2094:            (unsigned) ((char *)&p2->p_endcopy - (char *)&p2->p_startcopy));
                   2095:        p2->p_vmspace = NULL;
                   2096:
                   2097:        CIRCLEQ_INIT(&p2->p_sigpend.sp_info);
                   2098:
                   2099:        LIST_INIT(&p2->p_lwps);
                   2100:        LIST_INIT(&p2->p_sigwaiters);
                   2101:
                   2102:        /*
                   2103:         * Duplicate sub-structures as needed.
                   2104:         * Increase reference counts on shared objects.
                   2105:         * Inherit flags we want to keep.  The flags related to SIGCHLD
                   2106:         * handling are important in order to keep a consistent behaviour
                   2107:         * for the child after the fork.  If we are a 32-bit process, the
                   2108:         * child will be too.
                   2109:         */
                   2110:        p2->p_flag =
                   2111:            p1->p_flag & (PK_SUGID | PK_NOCLDWAIT | PK_CLDSIGIGN | PK_32);
                   2112:        p2->p_emul = p1->p_emul;
                   2113:        p2->p_execsw = p1->p_execsw;
                   2114:
                   2115:        mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
                   2116:        mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
                   2117:        rw_init(&p2->p_reflock);
                   2118:        cv_init(&p2->p_waitcv, "wait");
                   2119:        cv_init(&p2->p_lwpcv, "lwpwait");
                   2120:
                   2121:        p2->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
                   2122:
                   2123:        kauth_proc_fork(p1, p2);
                   2124:
                   2125:        p2->p_raslist = NULL;
                   2126:        p2->p_fd = fd_copy();
                   2127:
                   2128:        /* XXX racy */
                   2129:        p2->p_mqueue_cnt = p1->p_mqueue_cnt;
                   2130:
                   2131:        p2->p_cwdi = cwdinit();
                   2132:
                   2133:        /*
                   2134:         * Note: p_limit (rlimit stuff) is copy-on-write, so normally
                   2135:         * we just need increase pl_refcnt.
                   2136:         */
                   2137:        p1_lim = p1->p_limit;
                   2138:        if (!p1_lim->pl_writeable) {
                   2139:                lim_addref(p1_lim);
                   2140:                p2->p_limit = p1_lim;
                   2141:        } else {
                   2142:                p2->p_limit = lim_copy(p1->p_limit);
                   2143:        }
                   2144:
                   2145:        p2->p_lflag = 0;
                   2146:        p2->p_sflag = 0;
                   2147:        p2->p_slflag = 0;
                   2148:        p2->p_pptr = p1;
                   2149:        p2->p_ppid = p1->p_pid;
                   2150:        LIST_INIT(&p2->p_children);
                   2151:
                   2152:        p2->p_aio = NULL;
                   2153:
                   2154: #ifdef KTRACE
                   2155:        /*
                   2156:         * Copy traceflag and tracefile if enabled.
                   2157:         * If not inherited, these were zeroed above.
                   2158:         */
                   2159:        if (p1->p_traceflag & KTRFAC_INHERIT) {
                   2160:                mutex_enter(&ktrace_lock);
                   2161:                p2->p_traceflag = p1->p_traceflag;
                   2162:                if ((p2->p_tracep = p1->p_tracep) != NULL)
                   2163:                        ktradref(p2);
                   2164:                mutex_exit(&ktrace_lock);
                   2165:        }
                   2166: #endif
                   2167:
                   2168:        /*
                   2169:         * Create signal actions for the child process.
                   2170:         */
                   2171:        p2->p_sigacts = sigactsinit(p1, 0);
                   2172:        mutex_enter(p1->p_lock);
                   2173:        p2->p_sflag |=
                   2174:            (p1->p_sflag & (PS_STOPFORK | PS_STOPEXEC | PS_NOCLDSTOP));
                   2175:        sched_proc_fork(p1, p2);
                   2176:        mutex_exit(p1->p_lock);
                   2177:
                   2178:        p2->p_stflag = p1->p_stflag;
                   2179:
                   2180:        /*
                   2181:         * p_stats.
                   2182:         * Copy parts of p_stats, and zero out the rest.
                   2183:         */
                   2184:        p2->p_stats = pstatscopy(p1->p_stats);
                   2185:
                   2186:        /* copy over machdep flags to the new proc */
                   2187:        cpu_proc_fork(p1, p2);
                   2188:
                   2189:        /*
                   2190:         * Prepare remaining parts of spawn data
                   2191:         */
1.342     christos 2192:        if (fa && fa->len) {
                   2193:                spawn_data->sed_actions_len = fa->len;
                   2194:                spawn_data->sed_actions = fa->fae;
1.337     martin   2195:        }
1.342     christos 2196:        if (sa)
1.337     martin   2197:                spawn_data->sed_attrs = sa;
                   2198:
                   2199:        spawn_data->sed_parent = p1;
                   2200:        cv_init(&spawn_data->sed_cv_child_ready, "pspawn");
                   2201:        mutex_init(&spawn_data->sed_mtx_child, MUTEX_DEFAULT, IPL_NONE);
                   2202:        mutex_enter(&spawn_data->sed_mtx_child);
                   2203:
                   2204:        /* create LWP */
                   2205:        lwp_create(l1, p2, uaddr, 0, NULL, 0, spawn_return, spawn_data,
                   2206:            &l2, l1->l_class);
                   2207:        l2->l_ctxlink = NULL;   /* reset ucontext link */
                   2208:
                   2209:        /*
                   2210:         * Copy the credential so other references don't see our changes.
                   2211:         * Test to see if this is necessary first, since in the common case
                   2212:         * we won't need a private reference.
                   2213:         */
                   2214:        if (kauth_cred_geteuid(l2->l_cred) != kauth_cred_getsvuid(l2->l_cred) ||
                   2215:            kauth_cred_getegid(l2->l_cred) != kauth_cred_getsvgid(l2->l_cred)) {
                   2216:                l2->l_cred = kauth_cred_copy(l2->l_cred);
                   2217:                kauth_cred_setsvuid(l2->l_cred, kauth_cred_geteuid(l2->l_cred));
                   2218:                kauth_cred_setsvgid(l2->l_cred, kauth_cred_getegid(l2->l_cred));
                   2219:        }
                   2220:
                   2221:        /* Update the master credentials. */
                   2222:        if (l2->l_cred != p2->p_cred) {
                   2223:                kauth_cred_t ocred;
                   2224:
                   2225:                kauth_cred_hold(l2->l_cred);
                   2226:                mutex_enter(p2->p_lock);
                   2227:                ocred = p2->p_cred;
                   2228:                p2->p_cred = l2->l_cred;
                   2229:                mutex_exit(p2->p_lock);
                   2230:                kauth_cred_free(ocred);
                   2231:        }
                   2232:
                   2233:        /*
                   2234:         * It's now safe for the scheduler and other processes to see the
                   2235:         * child process.
                   2236:         */
                   2237:        mutex_enter(proc_lock);
                   2238:
                   2239:        if (p1->p_session->s_ttyvp != NULL && p1->p_lflag & PL_CONTROLT)
                   2240:                p2->p_lflag |= PL_CONTROLT;
                   2241:
                   2242:        LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
                   2243:        p2->p_exitsig = SIGCHLD;        /* signal for parent on exit */
                   2244:
                   2245:        LIST_INSERT_AFTER(p1, p2, p_pglist);
                   2246:        LIST_INSERT_HEAD(&allproc, p2, p_list);
                   2247:
                   2248:        p2->p_trace_enabled = trace_is_enabled(p2);
                   2249: #ifdef __HAVE_SYSCALL_INTERN
                   2250:        (*p2->p_emul->e_syscall_intern)(p2);
                   2251: #endif
                   2252:
                   2253:        /*
                   2254:         * Make child runnable, set start time, and add to run queue except
                   2255:         * if the parent requested the child to start in SSTOP state.
                   2256:         */
                   2257:        mutex_enter(p2->p_lock);
                   2258:
                   2259:        getmicrotime(&p2->p_stats->p_start);
                   2260:
                   2261:        lwp_lock(l2);
                   2262:        KASSERT(p2->p_nrlwps == 1);
                   2263:        p2->p_nrlwps = 1;
                   2264:        p2->p_stat = SACTIVE;
                   2265:        l2->l_stat = LSRUN;
                   2266:        sched_enqueue(l2, false);
                   2267:        lwp_unlock(l2);
                   2268:
                   2269:        mutex_exit(p2->p_lock);
                   2270:        mutex_exit(proc_lock);
                   2271:
                   2272:        cv_wait(&spawn_data->sed_cv_child_ready, &spawn_data->sed_mtx_child);
                   2273:        mutex_exit(&spawn_data->sed_mtx_child);
                   2274:        error = spawn_data->sed_error;
                   2275:
1.341     martin   2276:        rw_exit(&p1->p_reflock);
1.337     martin   2277:        rw_exit(&exec_lock);
                   2278:        have_exec_lock = false;
                   2279:
1.342     christos 2280:        if (fa)
1.344   ! christos 2281:                posix_spawn_fa_free(fa, fa->len);
1.337     martin   2282:
1.342     christos 2283:        if (sa)
                   2284:                kmem_free(sa, sizeof(*sa));
1.337     martin   2285:
                   2286:        cv_destroy(&spawn_data->sed_cv_child_ready);
                   2287:        mutex_destroy(&spawn_data->sed_mtx_child);
                   2288:
                   2289:        kmem_free(spawn_data, sizeof(*spawn_data));
                   2290:
                   2291:        if (error == 0 && SCARG(uap, pid) != NULL)
                   2292:                error = copyout(&pid, SCARG(uap, pid), sizeof(pid));
                   2293:
                   2294:        *retval = error;
                   2295:        return 0;
                   2296:
                   2297:  error_exit:
                   2298:        if (have_exec_lock)
                   2299:                rw_exit(&exec_lock);
                   2300:
1.342     christos 2301:        if (fa)
1.344   ! christos 2302:                posix_spawn_fa_free(fa, fa->len);
1.337     martin   2303:
1.342     christos 2304:        if (sa)
1.337     martin   2305:                kmem_free(sa, sizeof(*sa));
                   2306:
                   2307:        (void)chgproccnt(uid, -1);
                   2308:        atomic_dec_uint(&nprocs);
                   2309:
                   2310:        *retval = error;
                   2311:        return 0;
                   2312: }
                   2313:
1.336     matt     2314: void
                   2315: exec_free_emul_arg(struct exec_package *epp)
                   2316: {
                   2317:        if (epp->ep_emul_arg_free != NULL) {
                   2318:                KASSERT(epp->ep_emul_arg != NULL);
                   2319:                (*epp->ep_emul_arg_free)(epp->ep_emul_arg);
                   2320:                epp->ep_emul_arg_free = NULL;
                   2321:                epp->ep_emul_arg = NULL;
                   2322:        } else {
                   2323:                KASSERT(epp->ep_emul_arg == NULL);
                   2324:        }
                   2325: }

CVSweb <webmaster@jp.NetBSD.org>