[BACK]Return to kern_exec.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/kern_exec.c, Revision 1.420

1.420   ! pgoyette    1: /*     $NetBSD: kern_exec.c,v 1.419 2015/10/13 00:24:35 pgoyette Exp $ */
1.277     ad          2:
                      3: /*-
                      4:  * Copyright (c) 2008 The NetBSD Foundation, Inc.
                      5:  * All rights reserved.
                      6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
                     15:  *
                     16:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     17:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     18:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     19:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     20:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     21:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     22:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     23:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     24:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     25:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     26:  * POSSIBILITY OF SUCH DAMAGE.
                     27:  */
1.55      cgd        28:
                     29: /*-
1.77      cgd        30:  * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
1.55      cgd        31:  * Copyright (C) 1992 Wolfgang Solfrank.
                     32:  * Copyright (C) 1992 TooLs GmbH.
                     33:  * All rights reserved.
                     34:  *
                     35:  * Redistribution and use in source and binary forms, with or without
                     36:  * modification, are permitted provided that the following conditions
                     37:  * are met:
                     38:  * 1. Redistributions of source code must retain the above copyright
                     39:  *    notice, this list of conditions and the following disclaimer.
                     40:  * 2. Redistributions in binary form must reproduce the above copyright
                     41:  *    notice, this list of conditions and the following disclaimer in the
                     42:  *    documentation and/or other materials provided with the distribution.
                     43:  * 3. All advertising materials mentioning features or use of this software
                     44:  *    must display the following acknowledgement:
                     45:  *     This product includes software developed by TooLs GmbH.
                     46:  * 4. The name of TooLs GmbH may not be used to endorse or promote products
                     47:  *    derived from this software without specific prior written permission.
                     48:  *
                     49:  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
                     50:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
                     51:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     52:  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
                     53:  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
                     54:  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
                     55:  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
                     56:  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
                     57:  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
                     58:  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     59:  */
1.146     lukem      60:
                     61: #include <sys/cdefs.h>
1.420   ! pgoyette   62: __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.419 2015/10/13 00:24:35 pgoyette Exp $");
1.89      mrg        63:
1.325     jmcneill   64: #include "opt_exec.h"
1.360     christos   65: #include "opt_execfmt.h"
1.92      thorpej    66: #include "opt_ktrace.h"
1.285     apb        67: #include "opt_modular.h"
1.124     jdolecek   68: #include "opt_syscall_debug.h"
1.226     dogcow     69: #include "veriexec.h"
1.232     elad       70: #include "opt_pax.h"
1.55      cgd        71:
                     72: #include <sys/param.h>
                     73: #include <sys/systm.h>
                     74: #include <sys/filedesc.h>
                     75: #include <sys/kernel.h>
                     76: #include <sys/proc.h>
                     77: #include <sys/mount.h>
1.265     yamt       78: #include <sys/kmem.h>
1.55      cgd        79: #include <sys/namei.h>
                     80: #include <sys/vnode.h>
                     81: #include <sys/file.h>
1.414     christos   82: #include <sys/filedesc.h>
1.55      cgd        83: #include <sys/acct.h>
1.337     martin     84: #include <sys/atomic.h>
1.55      cgd        85: #include <sys/exec.h>
                     86: #include <sys/ktrace.h>
1.278     pooka      87: #include <sys/uidinfo.h>
1.55      cgd        88: #include <sys/wait.h>
                     89: #include <sys/mman.h>
1.155     gmcgarry   90: #include <sys/ras.h>
1.55      cgd        91: #include <sys/signalvar.h>
                     92: #include <sys/stat.h>
1.124     jdolecek   93: #include <sys/syscall.h>
1.218     elad       94: #include <sys/kauth.h>
1.253     ad         95: #include <sys/lwpctl.h>
1.260     christos   96: #include <sys/pax.h>
1.263     ad         97: #include <sys/cpu.h>
1.282     ad         98: #include <sys/module.h>
1.289     pooka      99: #include <sys/syscallvar.h>
1.56      cgd       100: #include <sys/syscallargs.h>
1.222     elad      101: #if NVERIEXEC > 0
1.197     blymn     102: #include <sys/verified_exec.h>
1.222     elad      103: #endif /* NVERIEXEC > 0 */
1.294     darran    104: #include <sys/sdt.h>
1.337     martin    105: #include <sys/spawn.h>
                    106: #include <sys/prot.h>
1.330     tls       107: #include <sys/cprng.h>
1.55      cgd       108:
1.88      mrg       109: #include <uvm/uvm_extern.h>
                    110:
1.55      cgd       111: #include <machine/reg.h>
                    112:
1.244     dsl       113: #include <compat/common/compat_util.h>
                    114:
1.364     martin    115: #ifndef MD_TOPDOWN_INIT
1.370     christos  116: #ifdef __USE_TOPDOWN_VM
1.364     martin    117: #define        MD_TOPDOWN_INIT(epp)    (epp)->ep_flags |= EXEC_TOPDOWN_VM
                    118: #else
                    119: #define        MD_TOPDOWN_INIT(epp)
                    120: #endif
                    121: #endif
                    122:
1.391     uebayasi  123: struct execve_data;
                    124:
1.396     uebayasi  125: static size_t calcargs(struct execve_data * restrict, const size_t);
                    126: static size_t calcstack(struct execve_data * restrict, const size_t);
1.399     uebayasi  127: static int copyoutargs(struct execve_data * restrict, struct lwp *,
                    128:     char * const);
1.398     uebayasi  129: static int copyoutpsstrs(struct execve_data * restrict, struct proc *);
1.391     uebayasi  130: static int copyinargs(struct execve_data * restrict, char * const *,
                    131:     char * const *, execve_fetch_element_t, char **);
1.392     uebayasi  132: static int copyinargstrs(struct execve_data * restrict, char * const *,
                    133:     execve_fetch_element_t, char **, size_t *, void (*)(const void *, size_t));
1.171     chs       134: static int exec_sigcode_map(struct proc *, const struct emul *);
                    135:
1.143     christos  136: #ifdef DEBUG_EXEC
1.305     matt      137: #define DPRINTF(a) printf a
1.312     christos  138: #define COPYPRINTF(s, a, b) printf("%s, %d: copyout%s @%p %zu\n", __func__, \
                    139:     __LINE__, (s), (a), (b))
1.388     uebayasi  140: static void dump_vmcmds(const struct exec_package * const, size_t, int);
                    141: #define DUMPVMCMDS(p, x, e) do { dump_vmcmds((p), (x), (e)); } while (0)
1.143     christos  142: #else
                    143: #define DPRINTF(a)
1.312     christos  144: #define COPYPRINTF(s, a, b)
1.388     uebayasi  145: #define DUMPVMCMDS(p, x, e) do {} while (0)
1.143     christos  146: #endif /* DEBUG_EXEC */
1.165     thorpej   147:
1.130     jdolecek  148: /*
1.294     darran    149:  * DTrace SDT provider definitions
                    150:  */
1.418     christos  151: SDT_PROVIDER_DECLARE(proc);
                    152: SDT_PROBE_DEFINE1(proc, kernel, , exec, "char *");
                    153: SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *");
                    154: SDT_PROBE_DEFINE1(proc, kernel, , exec__failure, "int");
1.294     darran    155:
                    156: /*
1.130     jdolecek  157:  * Exec function switch:
                    158:  *
                    159:  * Note that each makecmds function is responsible for loading the
                    160:  * exec package with the necessary functions for any exec-type-specific
                    161:  * handling.
                    162:  *
                    163:  * Functions for specific exec types should be defined in their own
                    164:  * header file.
                    165:  */
1.138     lukem     166: static const struct execsw     **execsw = NULL;
                    167: static int                     nexecs;
                    168:
1.282     ad        169: u_int  exec_maxhdrsz;   /* must not be static - used by netbsd32 */
1.130     jdolecek  170:
                    171: /* list of dynamically loaded execsw entries */
1.282     ad        172: static LIST_HEAD(execlist_head, exec_entry) ex_head =
                    173:     LIST_HEAD_INITIALIZER(ex_head);
1.130     jdolecek  174: struct exec_entry {
1.138     lukem     175:        LIST_ENTRY(exec_entry)  ex_list;
1.282     ad        176:        SLIST_ENTRY(exec_entry) ex_slist;
                    177:        const struct execsw     *ex_sw;
1.130     jdolecek  178: };
                    179:
1.203     christos  180: #ifndef __HAVE_SYSCALL_INTERN
                    181: void   syscall(void);
                    182: #endif
                    183:
1.173     christos  184: /* NetBSD emul struct */
1.282     ad        185: struct emul emul_netbsd = {
1.291     rmind     186:        .e_name =               "netbsd",
1.371     manu      187: #ifdef EMUL_NATIVEROOT
                    188:        .e_path =               EMUL_NATIVEROOT,
                    189: #else
                    190:        .e_path =               NULL,
                    191: #endif
1.133     mycroft   192: #ifndef __HAVE_MINIMAL_EMUL
1.291     rmind     193:        .e_flags =              EMUL_HAS_SYS___syscall,
                    194:        .e_errno =              NULL,
                    195:        .e_nosys =              SYS_syscall,
                    196:        .e_nsysent =            SYS_NSYSENT,
1.133     mycroft   197: #endif
1.291     rmind     198:        .e_sysent =             sysent,
1.124     jdolecek  199: #ifdef SYSCALL_DEBUG
1.291     rmind     200:        .e_syscallnames =       syscallnames,
1.124     jdolecek  201: #else
1.291     rmind     202:        .e_syscallnames =       NULL,
1.124     jdolecek  203: #endif
1.291     rmind     204:        .e_sendsig =            sendsig,
                    205:        .e_trapsignal =         trapsignal,
                    206:        .e_tracesig =           NULL,
                    207:        .e_sigcode =            NULL,
                    208:        .e_esigcode =           NULL,
                    209:        .e_sigobject =          NULL,
                    210:        .e_setregs =            setregs,
                    211:        .e_proc_exec =          NULL,
                    212:        .e_proc_fork =          NULL,
                    213:        .e_proc_exit =          NULL,
                    214:        .e_lwp_fork =           NULL,
                    215:        .e_lwp_exit =           NULL,
1.133     mycroft   216: #ifdef __HAVE_SYSCALL_INTERN
1.291     rmind     217:        .e_syscall_intern =     syscall_intern,
1.133     mycroft   218: #else
1.291     rmind     219:        .e_syscall =            syscall,
1.133     mycroft   220: #endif
1.291     rmind     221:        .e_sysctlovly =         NULL,
                    222:        .e_fault =              NULL,
                    223:        .e_vm_default_addr =    uvm_default_mapaddr,
                    224:        .e_usertrap =           NULL,
                    225:        .e_ucsize =             sizeof(ucontext_t),
                    226:        .e_startlwp =           startlwp
1.124     jdolecek  227: };
                    228:
1.55      cgd       229: /*
1.130     jdolecek  230:  * Exec lock. Used to control access to execsw[] structures.
                    231:  * This must not be static so that netbsd32 can access it, too.
                    232:  */
1.352     rmind     233: krwlock_t exec_lock;
                    234:
                    235: static kmutex_t sigobject_lock;
1.259     ad        236:
1.337     martin    237: /*
                    238:  * Data used between a loadvm and execve part of an "exec" operation
                    239:  */
                    240: struct execve_data {
                    241:        struct exec_package     ed_pack;
                    242:        struct pathbuf          *ed_pathbuf;
                    243:        struct vattr            ed_attr;
                    244:        struct ps_strings       ed_arginfo;
                    245:        char                    *ed_argp;
                    246:        const char              *ed_pathstring;
                    247:        char                    *ed_resolvedpathbuf;
                    248:        size_t                  ed_ps_strings_sz;
                    249:        int                     ed_szsigcode;
1.396     uebayasi  250:        size_t                  ed_argslen;
1.337     martin    251:        long                    ed_argc;
                    252:        long                    ed_envc;
                    253: };
                    254:
                    255: /*
                    256:  * data passed from parent lwp to child during a posix_spawn()
                    257:  */
                    258: struct spawn_exec_data {
                    259:        struct execve_data      sed_exec;
1.348     martin    260:        struct posix_spawn_file_actions
1.337     martin    261:                                *sed_actions;
                    262:        struct posix_spawnattr  *sed_attrs;
                    263:        struct proc             *sed_parent;
                    264:        kcondvar_t              sed_cv_child_ready;
                    265:        kmutex_t                sed_mtx_child;
                    266:        int                     sed_error;
1.348     martin    267:        volatile uint32_t       sed_refcnt;
1.337     martin    268: };
                    269:
1.277     ad        270: static void *
                    271: exec_pool_alloc(struct pool *pp, int flags)
                    272: {
                    273:
                    274:        return (void *)uvm_km_alloc(kernel_map, NCARGS, 0,
                    275:            UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
                    276: }
                    277:
                    278: static void
                    279: exec_pool_free(struct pool *pp, void *addr)
                    280: {
                    281:
                    282:        uvm_km_free(kernel_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE);
                    283: }
                    284:
                    285: static struct pool exec_pool;
                    286:
                    287: static struct pool_allocator exec_palloc = {
                    288:        .pa_alloc = exec_pool_alloc,
                    289:        .pa_free = exec_pool_free,
                    290:        .pa_pagesz = NCARGS
                    291: };
                    292:
1.130     jdolecek  293: /*
1.55      cgd       294:  * check exec:
                    295:  * given an "executable" described in the exec package's namei info,
                    296:  * see what we can do with it.
                    297:  *
                    298:  * ON ENTRY:
                    299:  *     exec package with appropriate namei info
1.212     christos  300:  *     lwp pointer of exec'ing lwp
1.55      cgd       301:  *     NO SELF-LOCKED VNODES
                    302:  *
                    303:  * ON EXIT:
                    304:  *     error:  nothing held, etc.  exec header still allocated.
1.77      cgd       305:  *     ok:     filled exec package, executable's vnode (unlocked).
1.55      cgd       306:  *
                    307:  * EXEC SWITCH ENTRY:
                    308:  *     Locked vnode to check, exec package, proc.
                    309:  *
                    310:  * EXEC SWITCH EXIT:
1.77      cgd       311:  *     ok:     return 0, filled exec package, executable's vnode (unlocked).
1.55      cgd       312:  *     error:  destructive:
                    313:  *                     everything deallocated execept exec header.
1.76      cgd       314:  *             non-destructive:
1.77      cgd       315:  *                     error code, executable's vnode (unlocked),
1.76      cgd       316:  *                     exec header unmodified.
1.55      cgd       317:  */
                    318: int
1.352     rmind     319: /*ARGSUSED*/
1.301     dholland  320: check_exec(struct lwp *l, struct exec_package *epp, struct pathbuf *pb)
1.55      cgd       321: {
1.138     lukem     322:        int             error, i;
                    323:        struct vnode    *vp;
1.295     dholland  324:        struct nameidata nd;
1.138     lukem     325:        size_t          resid;
1.55      cgd       326:
1.303     dholland  327:        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1.295     dholland  328:
1.55      cgd       329:        /* first get the vnode */
1.295     dholland  330:        if ((error = namei(&nd)) != 0)
1.55      cgd       331:                return error;
1.295     dholland  332:        epp->ep_vp = vp = nd.ni_vp;
1.368     christos  333:        /* normally this can't fail */
1.408     maxv      334:        error = copystr(nd.ni_pnbuf, epp->ep_resolvedname, PATH_MAX, NULL);
                    335:        KASSERT(error == 0);
1.295     dholland  336:
1.296     dholland  337: #ifdef DIAGNOSTIC
                    338:        /* paranoia (take this out once namei stuff stabilizes) */
1.302     dholland  339:        memset(nd.ni_pnbuf, '~', PATH_MAX);
1.295     dholland  340: #endif
1.55      cgd       341:
1.84      mycroft   342:        /* check access and type */
1.55      cgd       343:        if (vp->v_type != VREG) {
1.81      kleink    344:                error = EACCES;
1.55      cgd       345:                goto bad1;
                    346:        }
1.254     pooka     347:        if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
1.84      mycroft   348:                goto bad1;
1.55      cgd       349:
                    350:        /* get attributes */
1.254     pooka     351:        if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
1.55      cgd       352:                goto bad1;
                    353:
                    354:        /* Check mount point */
                    355:        if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
                    356:                error = EACCES;
                    357:                goto bad1;
                    358:        }
1.141     thorpej   359:        if (vp->v_mount->mnt_flag & MNT_NOSUID)
1.83      mycroft   360:                epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
1.55      cgd       361:
                    362:        /* try to open it */
1.254     pooka     363:        if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
1.55      cgd       364:                goto bad1;
                    365:
1.99      wrstuden  366:        /* unlock vp, since we need it unlocked from here on out. */
1.298     hannken   367:        VOP_UNLOCK(vp);
1.77      cgd       368:
1.222     elad      369: #if NVERIEXEC > 0
1.295     dholland  370:        error = veriexec_verify(l, vp, epp->ep_resolvedname,
1.233     elad      371:            epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
1.236     elad      372:            NULL);
                    373:        if (error)
1.234     elad      374:                goto bad2;
1.222     elad      375: #endif /* NVERIEXEC > 0 */
1.160     blymn     376:
1.232     elad      377: #ifdef PAX_SEGVGUARD
1.295     dholland  378:        error = pax_segvguard(l, vp, epp->ep_resolvedname, false);
1.234     elad      379:        if (error)
                    380:                goto bad2;
1.232     elad      381: #endif /* PAX_SEGVGUARD */
                    382:
1.55      cgd       383:        /* now we have the file, get the exec header */
1.74      christos  384:        error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
1.223     ad        385:                        UIO_SYSSPACE, 0, l->l_cred, &resid, NULL);
1.74      christos  386:        if (error)
1.55      cgd       387:                goto bad2;
                    388:        epp->ep_hdrvalid = epp->ep_hdrlen - resid;
                    389:
                    390:        /*
1.136     eeh       391:         * Set up default address space limits.  Can be overridden
                    392:         * by individual exec packages.
1.183     junyoung  393:         *
1.235     rillig    394:         * XXX probably should be all done in the exec packages.
1.136     eeh       395:         */
                    396:        epp->ep_vm_minaddr = VM_MIN_ADDRESS;
                    397:        epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
                    398:        /*
1.55      cgd       399:         * set up the vmcmds for creation of the process
                    400:         * address space
                    401:         */
                    402:        error = ENOEXEC;
1.244     dsl       403:        for (i = 0; i < nexecs; i++) {
1.68      cgd       404:                int newerror;
                    405:
1.130     jdolecek  406:                epp->ep_esch = execsw[i];
1.212     christos  407:                newerror = (*execsw[i]->es_makecmds)(l, epp);
1.244     dsl       408:
                    409:                if (!newerror) {
1.318     reinoud   410:                        /* Seems ok: check that entry point is not too high */
1.323     reinoud   411:                        if (epp->ep_entry > epp->ep_vm_maxaddr) {
1.322     reinoud   412: #ifdef DIAGNOSTIC
1.329     reinoud   413:                                printf("%s: rejecting %p due to "
1.331     christos  414:                                    "too high entry address (> %p)\n",
                    415:                                         __func__, (void *)epp->ep_entry,
                    416:                                         (void *)epp->ep_vm_maxaddr);
1.322     reinoud   417: #endif
1.318     reinoud   418:                                error = ENOEXEC;
                    419:                                break;
                    420:                        }
                    421:                        /* Seems ok: check that entry point is not too low */
1.323     reinoud   422:                        if (epp->ep_entry < epp->ep_vm_minaddr) {
1.322     reinoud   423: #ifdef DIAGNOSTIC
1.329     reinoud   424:                                printf("%s: rejecting %p due to "
1.331     christos  425:                                    "too low entry address (< %p)\n",
                    426:                                     __func__, (void *)epp->ep_entry,
                    427:                                     (void *)epp->ep_vm_minaddr);
1.322     reinoud   428: #endif
1.244     dsl       429:                                error = ENOEXEC;
                    430:                                break;
                    431:                        }
                    432:
                    433:                        /* check limits */
                    434:                        if ((epp->ep_tsize > MAXTSIZ) ||
                    435:                            (epp->ep_dsize > (u_quad_t)l->l_proc->p_rlimit
                    436:                                                    [RLIMIT_DATA].rlim_cur)) {
1.322     reinoud   437: #ifdef DIAGNOSTIC
1.323     reinoud   438:                                printf("%s: rejecting due to "
1.331     christos  439:                                    "limits (t=%llu > %llu || d=%llu > %llu)\n",
                    440:                                    __func__,
                    441:                                    (unsigned long long)epp->ep_tsize,
                    442:                                    (unsigned long long)MAXTSIZ,
                    443:                                    (unsigned long long)epp->ep_dsize,
1.332     christos  444:                                    (unsigned long long)
                    445:                                    l->l_proc->p_rlimit[RLIMIT_DATA].rlim_cur);
1.322     reinoud   446: #endif
1.244     dsl       447:                                error = ENOMEM;
                    448:                                break;
                    449:                        }
                    450:                        return 0;
                    451:                }
                    452:
                    453:                if (epp->ep_emul_root != NULL) {
                    454:                        vrele(epp->ep_emul_root);
                    455:                        epp->ep_emul_root = NULL;
                    456:                }
                    457:                if (epp->ep_interp != NULL) {
                    458:                        vrele(epp->ep_interp);
                    459:                        epp->ep_interp = NULL;
                    460:                }
                    461:
1.68      cgd       462:                /* make sure the first "interesting" error code is saved. */
1.244     dsl       463:                if (error == ENOEXEC)
1.68      cgd       464:                        error = newerror;
1.124     jdolecek  465:
1.244     dsl       466:                if (epp->ep_flags & EXEC_DESTR)
                    467:                        /* Error from "#!" code, tidied up by recursive call */
1.55      cgd       468:                        return error;
                    469:        }
                    470:
1.249     pooka     471:        /* not found, error */
                    472:
1.55      cgd       473:        /*
                    474:         * free any vmspace-creation commands,
                    475:         * and release their references
                    476:         */
                    477:        kill_vmcmds(&epp->ep_vmcmds);
                    478:
                    479: bad2:
                    480:        /*
1.99      wrstuden  481:         * close and release the vnode, restore the old one, free the
1.55      cgd       482:         * pathname buf, and punt.
                    483:         */
1.99      wrstuden  484:        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.254     pooka     485:        VOP_CLOSE(vp, FREAD, l->l_cred);
1.99      wrstuden  486:        vput(vp);
1.55      cgd       487:        return error;
                    488:
                    489: bad1:
                    490:        /*
                    491:         * free the namei pathname buffer, and put the vnode
                    492:         * (which we don't yet have open).
                    493:         */
1.77      cgd       494:        vput(vp);                               /* was still locked */
1.55      cgd       495:        return error;
                    496: }
                    497:
1.188     chs       498: #ifdef __MACHINE_STACK_GROWS_UP
                    499: #define STACK_PTHREADSPACE NBPG
                    500: #else
                    501: #define STACK_PTHREADSPACE 0
                    502: #endif
                    503:
1.204     cube      504: static int
                    505: execve_fetch_element(char * const *array, size_t index, char **value)
                    506: {
                    507:        return copyin(array + index, value, sizeof(*value));
                    508: }
                    509:
1.55      cgd       510: /*
                    511:  * exec system call
                    512:  */
1.75      christos  513: int
1.258     dsl       514: sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval)
1.71      thorpej   515: {
1.258     dsl       516:        /* {
1.138     lukem     517:                syscallarg(const char *)        path;
                    518:                syscallarg(char * const *)      argp;
                    519:                syscallarg(char * const *)      envp;
1.258     dsl       520:        } */
1.204     cube      521:
                    522:        return execve1(l, SCARG(uap, path), SCARG(uap, argp),
                    523:            SCARG(uap, envp), execve_fetch_element);
                    524: }
                    525:
1.376     maxv      526: int
1.317     manu      527: sys_fexecve(struct lwp *l, const struct sys_fexecve_args *uap,
                    528:     register_t *retval)
                    529: {
                    530:        /* {
                    531:                syscallarg(int)                 fd;
                    532:                syscallarg(char * const *)      argp;
                    533:                syscallarg(char * const *)      envp;
                    534:        } */
                    535:
                    536:        return ENOSYS;
                    537: }
                    538:
1.282     ad        539: /*
                    540:  * Load modules to try and execute an image that we do not understand.
                    541:  * If no execsw entries are present, we load those likely to be needed
                    542:  * in order to run native images only.  Otherwise, we autoload all
                    543:  * possible modules that could let us run the binary.  XXX lame
                    544:  */
                    545: static void
                    546: exec_autoload(void)
                    547: {
                    548: #ifdef MODULAR
                    549:        static const char * const native[] = {
                    550:                "exec_elf32",
                    551:                "exec_elf64",
                    552:                "exec_script",
                    553:                NULL
                    554:        };
                    555:        static const char * const compat[] = {
                    556:                "exec_elf32",
                    557:                "exec_elf64",
                    558:                "exec_script",
                    559:                "exec_aout",
                    560:                "exec_coff",
                    561:                "exec_ecoff",
                    562:                "compat_aoutm68k",
                    563:                "compat_freebsd",
                    564:                "compat_ibcs2",
                    565:                "compat_linux",
                    566:                "compat_linux32",
                    567:                "compat_netbsd32",
                    568:                "compat_sunos",
                    569:                "compat_sunos32",
                    570:                "compat_svr4",
                    571:                "compat_svr4_32",
                    572:                "compat_ultrix",
                    573:                NULL
                    574:        };
                    575:        char const * const *list;
                    576:        int i;
                    577:
                    578:        list = (nexecs == 0 ? native : compat);
                    579:        for (i = 0; list[i] != NULL; i++) {
1.363     christos  580:                if (module_autoload(list[i], MODULE_CLASS_EXEC) != 0) {
1.376     maxv      581:                        continue;
1.282     ad        582:                }
1.376     maxv      583:                yield();
1.282     ad        584:        }
                    585: #endif
                    586: }
                    587:
1.415     christos  588: static int
                    589: makepathbuf(struct lwp *l, const char *upath, struct pathbuf **pbp,
                    590:     size_t *offs)
1.414     christos  591: {
                    592:        char *path, *bp;
1.415     christos  593:        size_t len, tlen;
1.414     christos  594:        int error;
                    595:        struct cwdinfo *cwdi;
                    596:
                    597:        path = PNBUF_GET();
                    598:        error = copyinstr(upath, path, MAXPATHLEN, &len);
                    599:        if (error) {
                    600:                PNBUF_PUT(path);
                    601:                DPRINTF(("%s: copyin path @%p %d\n", __func__, upath, error));
1.415     christos  602:                return error;
1.414     christos  603:        }
                    604:
1.415     christos  605:        if (path[0] == '/') {
                    606:                *offs = 0;
1.414     christos  607:                goto out;
1.415     christos  608:        }
1.414     christos  609:
                    610:        len++;
                    611:        if (len + 1 >= MAXPATHLEN)
                    612:                goto out;
                    613:        bp = path + MAXPATHLEN - len;
                    614:        memmove(bp, path, len);
                    615:        *(--bp) = '/';
                    616:
                    617:        cwdi = l->l_proc->p_cwdi;
                    618:        rw_enter(&cwdi->cwdi_lock, RW_READER);
                    619:        error = getcwd_common(cwdi->cwdi_cdir, NULL, &bp, path, MAXPATHLEN / 2,
                    620:            GETCWD_CHECK_ACCESS, l);
                    621:        rw_exit(&cwdi->cwdi_lock);
                    622:
                    623:        if (error) {
                    624:                DPRINTF(("%s: getcwd_common path %s %d\n", __func__, path,
                    625:                    error));
                    626:                goto out;
                    627:        }
1.415     christos  628:        tlen = path + MAXPATHLEN - bp;
1.414     christos  629:
1.415     christos  630:        memmove(path, bp, tlen);
                    631:        path[tlen] = '\0';
                    632:        *offs = tlen - len;
1.414     christos  633: out:
1.415     christos  634:        *pbp = pathbuf_assimilate(path);
                    635:        return 0;
1.414     christos  636: }
                    637:
1.337     martin    638: static int
                    639: execve_loadvm(struct lwp *l, const char *path, char * const *args,
                    640:        char * const *envs, execve_fetch_element_t fetch_element,
                    641:        struct execve_data * restrict data)
1.204     cube      642: {
1.378     uebayasi  643:        struct exec_package     * const epp = &data->ed_pack;
1.153     thorpej   644:        int                     error;
1.164     thorpej   645:        struct proc             *p;
1.391     uebayasi  646:        char                    *dp;
1.282     ad        647:        u_int                   modgen;
1.416     christos  648:        size_t                  offs = 0;       // XXX: GCC
1.337     martin    649:
                    650:        KASSERT(data != NULL);
1.55      cgd       651:
1.237     ad        652:        p = l->l_proc;
1.376     maxv      653:        modgen = 0;
1.164     thorpej   654:
1.418     christos  655:        SDT_PROBE(proc, kernel, , exec, path, 0, 0, 0, 0);
1.294     darran    656:
1.149     christos  657:        /*
1.269     christos  658:         * Check if we have exceeded our number of processes limit.
                    659:         * This is so that we handle the case where a root daemon
                    660:         * forked, ran setuid to become the desired user and is trying
                    661:         * to exec. The obvious place to do the reference counting check
                    662:         * is setuid(), but we don't do the reference counting check there
                    663:         * like other OS's do because then all the programs that use setuid()
                    664:         * must be modified to check the return code of setuid() and exit().
                    665:         * It is dangerous to make setuid() fail, because it fails open and
                    666:         * the program will continue to run as root. If we make it succeed
                    667:         * and return an error code, again we are not enforcing the limit.
                    668:         * The best place to enforce the limit is here, when the process tries
                    669:         * to execute a new image, because eventually the process will need
                    670:         * to call exec in order to do something useful.
                    671:         */
1.282     ad        672:  retry:
1.347     elad      673:        if (p->p_flag & PK_SUGID) {
                    674:                if (kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
                    675:                     p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
                    676:                     &p->p_rlimit[RLIMIT_NPROC],
                    677:                     KAUTH_ARG(RLIMIT_NPROC)) != 0 &&
                    678:                    chgproccnt(kauth_cred_getuid(l->l_cred), 0) >
                    679:                     p->p_rlimit[RLIMIT_NPROC].rlim_cur)
1.269     christos  680:                return EAGAIN;
1.347     elad      681:        }
1.269     christos  682:
                    683:        /*
1.352     rmind     684:         * Drain existing references and forbid new ones.  The process
                    685:         * should be left alone until we're done here.  This is necessary
                    686:         * to avoid race conditions - e.g. in ptrace() - that might allow
                    687:         * a local user to illicitly obtain elevated privileges.
                    688:         */
                    689:        rw_enter(&p->p_reflock, RW_WRITER);
                    690:
                    691:        /*
1.129     jdolecek  692:         * Init the namei data to point the file user's program name.
                    693:         * This is done here rather than in check_exec(), so that it's
                    694:         * possible to override this settings if any of makecmd/probe
                    695:         * functions call check_exec() recursively - for example,
                    696:         * see exec_script_makecmds().
                    697:         */
1.415     christos  698:        if ((error = makepathbuf(l, path, &data->ed_pathbuf, &offs)) != 0)
1.352     rmind     699:                goto clrflg;
1.337     martin    700:        data->ed_pathstring = pathbuf_stringcopy_get(data->ed_pathbuf);
                    701:        data->ed_resolvedpathbuf = PNBUF_GET();
1.55      cgd       702:
                    703:        /*
                    704:         * initialize the fields of the exec package.
                    705:         */
1.415     christos  706:        epp->ep_kname = data->ed_pathstring + offs;
1.378     uebayasi  707:        epp->ep_resolvedname = data->ed_resolvedpathbuf;
                    708:        epp->ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP);
                    709:        epp->ep_hdrlen = exec_maxhdrsz;
                    710:        epp->ep_hdrvalid = 0;
                    711:        epp->ep_emul_arg = NULL;
                    712:        epp->ep_emul_arg_free = NULL;
                    713:        memset(&epp->ep_vmcmds, 0, sizeof(epp->ep_vmcmds));
                    714:        epp->ep_vap = &data->ed_attr;
1.411     christos  715:        epp->ep_flags = (p->p_flag & PK_32) ? EXEC_FROM32 : 0;
1.378     uebayasi  716:        MD_TOPDOWN_INIT(epp);
                    717:        epp->ep_emul_root = NULL;
                    718:        epp->ep_interp = NULL;
                    719:        epp->ep_esch = NULL;
                    720:        epp->ep_pax_flags = 0;
                    721:        memset(epp->ep_machine_arch, 0, sizeof(epp->ep_machine_arch));
1.55      cgd       722:
1.237     ad        723:        rw_enter(&exec_lock, RW_READER);
1.130     jdolecek  724:
1.55      cgd       725:        /* see if we can run it. */
1.378     uebayasi  726:        if ((error = check_exec(l, epp, data->ed_pathbuf)) != 0) {
1.261     xtraeme   727:                if (error != ENOENT) {
1.312     christos  728:                        DPRINTF(("%s: check exec failed %d\n",
                    729:                            __func__, error));
1.261     xtraeme   730:                }
1.352     rmind     731:                goto freehdr;
1.248     christos  732:        }
1.55      cgd       733:
                    734:        /* allocate an argument buffer */
1.337     martin    735:        data->ed_argp = pool_get(&exec_pool, PR_WAITOK);
                    736:        KASSERT(data->ed_argp != NULL);
                    737:        dp = data->ed_argp;
1.55      cgd       738:
1.391     uebayasi  739:        if ((error = copyinargs(data, args, envs, fetch_element, &dp)) != 0) {
1.55      cgd       740:                goto bad;
                    741:        }
1.61      mycroft   742:
1.379     uebayasi  743:        /*
                    744:         * Calculate the new stack size.
                    745:         */
                    746:
                    747: #ifdef PAX_ASLR
1.417     maxv      748: #define        ASLR_GAP(epp)   (pax_aslr_epp_active(epp) ? (cprng_fast32() % PAGE_SIZE) : 0)
1.379     uebayasi  749: #else
1.417     maxv      750: #define        ASLR_GAP(epp)   0
1.379     uebayasi  751: #endif
                    752:
1.267     dsl       753: #ifdef __MACHINE_STACK_GROWS_UP
1.386     uebayasi  754: /*
                    755:  * copyargs() fills argc/argv/envp from the lower address even on
                    756:  * __MACHINE_STACK_GROWS_UP machines.  Reserve a few words just below the SP
                    757:  * so that _rtld() use it.
                    758:  */
1.267     dsl       759: #define        RTLD_GAP        32
                    760: #else
                    761: #define        RTLD_GAP        0
                    762: #endif
                    763:
1.396     uebayasi  764:        const size_t argenvstrlen = (char *)ALIGN(dp) - data->ed_argp;
1.386     uebayasi  765:
1.396     uebayasi  766:        data->ed_argslen = calcargs(data, argenvstrlen);
1.386     uebayasi  767:
1.417     maxv      768:        const size_t len = calcstack(data, ASLR_GAP(epp) + RTLD_GAP);
1.55      cgd       769:
1.396     uebayasi  770:        if (len > epp->ep_ssize) {
1.337     martin    771:                /* in effect, compare to initial limit */
1.396     uebayasi  772:                DPRINTF(("%s: stack limit exceeded %zu\n", __func__, len));
1.403     maxv      773:                error = ENOMEM;
1.55      cgd       774:                goto bad;
                    775:        }
1.337     martin    776:        /* adjust "active stack depth" for process VSZ */
1.396     uebayasi  777:        epp->ep_ssize = len;
1.337     martin    778:
                    779:        return 0;
                    780:
1.352     rmind     781:  bad:
                    782:        /* free the vmspace-creation commands, and release their references */
1.378     uebayasi  783:        kill_vmcmds(&epp->ep_vmcmds);
1.352     rmind     784:        /* kill any opened file descriptor, if necessary */
1.378     uebayasi  785:        if (epp->ep_flags & EXEC_HASFD) {
                    786:                epp->ep_flags &= ~EXEC_HASFD;
                    787:                fd_close(epp->ep_fd);
1.352     rmind     788:        }
                    789:        /* close and put the exec'd file */
1.378     uebayasi  790:        vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
                    791:        VOP_CLOSE(epp->ep_vp, FREAD, l->l_cred);
                    792:        vput(epp->ep_vp);
1.352     rmind     793:        pool_put(&exec_pool, data->ed_argp);
                    794:
                    795:  freehdr:
1.378     uebayasi  796:        kmem_free(epp->ep_hdr, epp->ep_hdrlen);
                    797:        if (epp->ep_emul_root != NULL)
                    798:                vrele(epp->ep_emul_root);
                    799:        if (epp->ep_interp != NULL)
                    800:                vrele(epp->ep_interp);
1.352     rmind     801:
1.337     martin    802:        rw_exit(&exec_lock);
1.352     rmind     803:
                    804:        pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
                    805:        pathbuf_destroy(data->ed_pathbuf);
                    806:        PNBUF_PUT(data->ed_resolvedpathbuf);
                    807:
                    808:  clrflg:
1.351     rmind     809:        rw_exit(&p->p_reflock);
1.337     martin    810:
                    811:        if (modgen != module_gen && error == ENOEXEC) {
                    812:                modgen = module_gen;
                    813:                exec_autoload();
                    814:                goto retry;
                    815:        }
                    816:
1.418     christos  817:        SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0);
1.337     martin    818:        return error;
                    819: }
                    820:
1.401     uebayasi  821: static int
                    822: execve_dovmcmds(struct lwp *l, struct execve_data * restrict data)
                    823: {
                    824:        struct exec_package     * const epp = &data->ed_pack;
                    825:        struct proc             *p = l->l_proc;
                    826:        struct exec_vmcmd       *base_vcp;
                    827:        int                     error = 0;
1.407     riastrad  828:        size_t                  i;
1.401     uebayasi  829:
                    830:        /* record proc's vnode, for use by procfs and others */
                    831:        if (p->p_textvp)
                    832:                vrele(p->p_textvp);
                    833:        vref(epp->ep_vp);
                    834:        p->p_textvp = epp->ep_vp;
                    835:
                    836:        /* create the new process's VM space by running the vmcmds */
                    837:        KASSERTMSG(epp->ep_vmcmds.evs_used != 0, "%s: no vmcmds", __func__);
                    838:
                    839:        DUMPVMCMDS(epp, 0, 0);
                    840:
                    841:        base_vcp = NULL;
                    842:
                    843:        for (i = 0; i < epp->ep_vmcmds.evs_used && !error; i++) {
                    844:                struct exec_vmcmd *vcp;
                    845:
                    846:                vcp = &epp->ep_vmcmds.evs_cmds[i];
                    847:                if (vcp->ev_flags & VMCMD_RELATIVE) {
                    848:                        KASSERTMSG(base_vcp != NULL,
                    849:                            "%s: relative vmcmd with no base", __func__);
                    850:                        KASSERTMSG((vcp->ev_flags & VMCMD_BASE) == 0,
                    851:                            "%s: illegal base & relative vmcmd", __func__);
                    852:                        vcp->ev_addr += base_vcp->ev_addr;
                    853:                }
                    854:                error = (*vcp->ev_proc)(l, vcp);
                    855:                if (error)
                    856:                        DUMPVMCMDS(epp, i, error);
                    857:                if (vcp->ev_flags & VMCMD_BASE)
                    858:                        base_vcp = vcp;
                    859:        }
                    860:
                    861:        /* free the vmspace-creation commands, and release their references */
                    862:        kill_vmcmds(&epp->ep_vmcmds);
                    863:
                    864:        vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
                    865:        VOP_CLOSE(epp->ep_vp, FREAD, l->l_cred);
                    866:        vput(epp->ep_vp);
                    867:
                    868:        /* if an error happened, deallocate and punt */
                    869:        if (error != 0) {
                    870:                DPRINTF(("%s: vmcmd %zu failed: %d\n", __func__, i - 1, error));
                    871:        }
                    872:        return error;
                    873: }
                    874:
1.352     rmind     875: static void
                    876: execve_free_data(struct execve_data *data)
                    877: {
1.378     uebayasi  878:        struct exec_package     * const epp = &data->ed_pack;
1.352     rmind     879:
                    880:        /* free the vmspace-creation commands, and release their references */
1.378     uebayasi  881:        kill_vmcmds(&epp->ep_vmcmds);
1.352     rmind     882:        /* kill any opened file descriptor, if necessary */
1.378     uebayasi  883:        if (epp->ep_flags & EXEC_HASFD) {
                    884:                epp->ep_flags &= ~EXEC_HASFD;
                    885:                fd_close(epp->ep_fd);
1.352     rmind     886:        }
                    887:
                    888:        /* close and put the exec'd file */
1.378     uebayasi  889:        vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
                    890:        VOP_CLOSE(epp->ep_vp, FREAD, curlwp->l_cred);
                    891:        vput(epp->ep_vp);
1.352     rmind     892:        pool_put(&exec_pool, data->ed_argp);
                    893:
1.378     uebayasi  894:        kmem_free(epp->ep_hdr, epp->ep_hdrlen);
                    895:        if (epp->ep_emul_root != NULL)
                    896:                vrele(epp->ep_emul_root);
                    897:        if (epp->ep_interp != NULL)
                    898:                vrele(epp->ep_interp);
1.352     rmind     899:
                    900:        pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
                    901:        pathbuf_destroy(data->ed_pathbuf);
                    902:        PNBUF_PUT(data->ed_resolvedpathbuf);
                    903: }
                    904:
1.400     uebayasi  905: static void
                    906: pathexec(struct exec_package *epp, struct proc *p, const char *pathstring)
                    907: {
                    908:        const char              *commandname;
                    909:        size_t                  commandlen;
                    910:        char                    *path;
                    911:
                    912:        /* set command name & other accounting info */
                    913:        commandname = strrchr(epp->ep_resolvedname, '/');
                    914:        if (commandname != NULL) {
                    915:                commandname++;
                    916:        } else {
                    917:                commandname = epp->ep_resolvedname;
                    918:        }
                    919:        commandlen = min(strlen(commandname), MAXCOMLEN);
                    920:        (void)memcpy(p->p_comm, commandname, commandlen);
                    921:        p->p_comm[commandlen] = '\0';
                    922:
                    923:
                    924:        /*
                    925:         * If the path starts with /, we don't need to do any work.
                    926:         * This handles the majority of the cases.
                    927:         * In the future perhaps we could canonicalize it?
                    928:         */
                    929:        if (pathstring[0] == '/') {
1.414     christos  930:                path = PNBUF_GET();
                    931:                (void)strlcpy(path, pathstring, MAXPATHLEN);
1.400     uebayasi  932:                epp->ep_path = path;
1.414     christos  933:        } else
1.400     uebayasi  934:                epp->ep_path = NULL;
                    935: }
                    936:
1.387     uebayasi  937: /* XXX elsewhere */
                    938: static int
                    939: credexec(struct lwp *l, struct vattr *attr)
                    940: {
                    941:        struct proc *p = l->l_proc;
                    942:        int error;
                    943:
                    944:        /*
                    945:         * Deal with set[ug]id.  MNT_NOSUID has already been used to disable
                    946:         * s[ug]id.  It's OK to check for PSL_TRACED here as we have blocked
                    947:         * out additional references on the process for the moment.
                    948:         */
                    949:        if ((p->p_slflag & PSL_TRACED) == 0 &&
                    950:
                    951:            (((attr->va_mode & S_ISUID) != 0 &&
                    952:              kauth_cred_geteuid(l->l_cred) != attr->va_uid) ||
                    953:
                    954:             ((attr->va_mode & S_ISGID) != 0 &&
                    955:              kauth_cred_getegid(l->l_cred) != attr->va_gid))) {
                    956:                /*
                    957:                 * Mark the process as SUGID before we do
                    958:                 * anything that might block.
                    959:                 */
                    960:                proc_crmod_enter();
                    961:                proc_crmod_leave(NULL, NULL, true);
                    962:
                    963:                /* Make sure file descriptors 0..2 are in use. */
                    964:                if ((error = fd_checkstd()) != 0) {
                    965:                        DPRINTF(("%s: fdcheckstd failed %d\n",
                    966:                            __func__, error));
                    967:                        return error;
                    968:                }
                    969:
                    970:                /*
                    971:                 * Copy the credential so other references don't see our
                    972:                 * changes.
                    973:                 */
                    974:                l->l_cred = kauth_cred_copy(l->l_cred);
                    975: #ifdef KTRACE
                    976:                /*
                    977:                 * If the persistent trace flag isn't set, turn off.
                    978:                 */
                    979:                if (p->p_tracep) {
                    980:                        mutex_enter(&ktrace_lock);
                    981:                        if (!(p->p_traceflag & KTRFAC_PERSISTENT))
                    982:                                ktrderef(p);
                    983:                        mutex_exit(&ktrace_lock);
                    984:                }
                    985: #endif
                    986:                if (attr->va_mode & S_ISUID)
                    987:                        kauth_cred_seteuid(l->l_cred, attr->va_uid);
                    988:                if (attr->va_mode & S_ISGID)
                    989:                        kauth_cred_setegid(l->l_cred, attr->va_gid);
                    990:        } else {
                    991:                if (kauth_cred_geteuid(l->l_cred) ==
                    992:                    kauth_cred_getuid(l->l_cred) &&
                    993:                    kauth_cred_getegid(l->l_cred) ==
                    994:                    kauth_cred_getgid(l->l_cred))
                    995:                        p->p_flag &= ~PK_SUGID;
                    996:        }
                    997:
                    998:        /*
                    999:         * Copy the credential so other references don't see our changes.
                   1000:         * Test to see if this is necessary first, since in the common case
                   1001:         * we won't need a private reference.
                   1002:         */
                   1003:        if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
                   1004:            kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
                   1005:                l->l_cred = kauth_cred_copy(l->l_cred);
                   1006:                kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
                   1007:                kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
                   1008:        }
                   1009:
                   1010:        /* Update the master credentials. */
                   1011:        if (l->l_cred != p->p_cred) {
                   1012:                kauth_cred_t ocred;
                   1013:
                   1014:                kauth_cred_hold(l->l_cred);
                   1015:                mutex_enter(p->p_lock);
                   1016:                ocred = p->p_cred;
                   1017:                p->p_cred = l->l_cred;
                   1018:                mutex_exit(p->p_lock);
                   1019:                kauth_cred_free(ocred);
                   1020:        }
                   1021:
                   1022:        return 0;
                   1023: }
                   1024:
1.406     uebayasi 1025: static void
                   1026: emulexec(struct lwp *l, struct exec_package *epp)
                   1027: {
                   1028:        struct proc             *p = l->l_proc;
                   1029:
                   1030:        /* The emulation root will usually have been found when we looked
                   1031:         * for the elf interpreter (or similar), if not look now. */
                   1032:        if (epp->ep_esch->es_emul->e_path != NULL &&
                   1033:            epp->ep_emul_root == NULL)
                   1034:                emul_find_root(l, epp);
                   1035:
                   1036:        /* Any old emulation root got removed by fdcloseexec */
                   1037:        rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
                   1038:        p->p_cwdi->cwdi_edir = epp->ep_emul_root;
                   1039:        rw_exit(&p->p_cwdi->cwdi_lock);
                   1040:        epp->ep_emul_root = NULL;
                   1041:        if (epp->ep_interp != NULL)
                   1042:                vrele(epp->ep_interp);
                   1043:
                   1044:        /*
                   1045:         * Call emulation specific exec hook. This can setup per-process
                   1046:         * p->p_emuldata or do any other per-process stuff an emulation needs.
                   1047:         *
                   1048:         * If we are executing process of different emulation than the
                   1049:         * original forked process, call e_proc_exit() of the old emulation
                   1050:         * first, then e_proc_exec() of new emulation. If the emulation is
                   1051:         * same, the exec hook code should deallocate any old emulation
                   1052:         * resources held previously by this process.
                   1053:         */
                   1054:        if (p->p_emul && p->p_emul->e_proc_exit
                   1055:            && p->p_emul != epp->ep_esch->es_emul)
                   1056:                (*p->p_emul->e_proc_exit)(p);
                   1057:
                   1058:        /*
                   1059:         * This is now LWP 1.
                   1060:         */
                   1061:        /* XXX elsewhere */
                   1062:        mutex_enter(p->p_lock);
                   1063:        p->p_nlwpid = 1;
                   1064:        l->l_lid = 1;
                   1065:        mutex_exit(p->p_lock);
                   1066:
                   1067:        /*
                   1068:         * Call exec hook. Emulation code may NOT store reference to anything
                   1069:         * from &pack.
                   1070:         */
                   1071:        if (epp->ep_esch->es_emul->e_proc_exec)
                   1072:                (*epp->ep_esch->es_emul->e_proc_exec)(p, epp);
                   1073:
                   1074:        /* update p_emul, the old value is no longer needed */
                   1075:        p->p_emul = epp->ep_esch->es_emul;
                   1076:
                   1077:        /* ...and the same for p_execsw */
                   1078:        p->p_execsw = epp->ep_esch;
                   1079:
                   1080: #ifdef __HAVE_SYSCALL_INTERN
                   1081:        (*p->p_emul->e_syscall_intern)(p);
                   1082: #endif
                   1083:        ktremul();
                   1084: }
                   1085:
1.337     martin   1086: static int
1.348     martin   1087: execve_runproc(struct lwp *l, struct execve_data * restrict data,
                   1088:        bool no_local_exec_lock, bool is_spawn)
1.337     martin   1089: {
1.378     uebayasi 1090:        struct exec_package     * const epp = &data->ed_pack;
1.352     rmind    1091:        int error = 0;
                   1092:        struct proc             *p;
1.337     martin   1093:
1.348     martin   1094:        /*
                   1095:         * In case of a posix_spawn operation, the child doing the exec
                   1096:         * might not hold the reader lock on exec_lock, but the parent
                   1097:         * will do this instead.
                   1098:         */
                   1099:        KASSERT(no_local_exec_lock || rw_lock_held(&exec_lock));
1.381     uebayasi 1100:        KASSERT(!no_local_exec_lock || is_spawn);
1.337     martin   1101:        KASSERT(data != NULL);
1.352     rmind    1102:
                   1103:        p = l->l_proc;
1.337     martin   1104:
1.237     ad       1105:        /* Get rid of other LWPs. */
1.340     rmind    1106:        if (p->p_nlwps > 1) {
1.272     ad       1107:                mutex_enter(p->p_lock);
1.237     ad       1108:                exit_lwps(l);
1.272     ad       1109:                mutex_exit(p->p_lock);
1.237     ad       1110:        }
1.164     thorpej  1111:        KDASSERT(p->p_nlwps == 1);
                   1112:
1.253     ad       1113:        /* Destroy any lwpctl info. */
                   1114:        if (p->p_lwpctl != NULL)
                   1115:                lwp_ctl_exit();
                   1116:
1.164     thorpej  1117:        /* Remove POSIX timers */
                   1118:        timers_free(p, TIMERS_POSIX);
                   1119:
1.417     maxv     1120:        /* Set the PaX flags. */
                   1121:        p->p_pax = epp->ep_pax_flags;
                   1122:
1.86      thorpej  1123:        /*
                   1124:         * Do whatever is necessary to prepare the address space
                   1125:         * for remapping.  Note that this might replace the current
                   1126:         * vmspace with another!
                   1127:         */
1.348     martin   1128:        if (is_spawn)
1.378     uebayasi 1129:                uvmspace_spawn(l, epp->ep_vm_minaddr,
                   1130:                    epp->ep_vm_maxaddr,
                   1131:                    epp->ep_flags & EXEC_TOPDOWN_VM);
1.348     martin   1132:        else
1.378     uebayasi 1133:                uvmspace_exec(l, epp->ep_vm_minaddr,
                   1134:                    epp->ep_vm_maxaddr,
                   1135:                    epp->ep_flags & EXEC_TOPDOWN_VM);
1.55      cgd      1136:
1.385     uebayasi 1137:        struct vmspace          *vm;
1.86      thorpej  1138:        vm = p->p_vmspace;
1.378     uebayasi 1139:        vm->vm_taddr = (void *)epp->ep_taddr;
                   1140:        vm->vm_tsize = btoc(epp->ep_tsize);
                   1141:        vm->vm_daddr = (void*)epp->ep_daddr;
                   1142:        vm->vm_dsize = btoc(epp->ep_dsize);
                   1143:        vm->vm_ssize = btoc(epp->ep_ssize);
1.288     mrg      1144:        vm->vm_issize = 0;
1.378     uebayasi 1145:        vm->vm_maxsaddr = (void *)epp->ep_maxsaddr;
                   1146:        vm->vm_minsaddr = (void *)epp->ep_minsaddr;
1.55      cgd      1147:
1.260     christos 1148: #ifdef PAX_ASLR
1.413     maxv     1149:        pax_aslr_init_vm(l, vm);
1.260     christos 1150: #endif /* PAX_ASLR */
                   1151:
1.401     uebayasi 1152:        /* Now map address space. */
                   1153:        error = execve_dovmcmds(l, data);
                   1154:        if (error != 0)
1.55      cgd      1155:                goto exec_abort;
                   1156:
1.400     uebayasi 1157:        pathexec(epp, p, data->ed_pathstring);
1.255     christos 1158:
1.397     uebayasi 1159:        char * const newstack = STACK_GROW(vm->vm_minsaddr, epp->ep_ssize);
1.386     uebayasi 1160:
1.399     uebayasi 1161:        error = copyoutargs(data, l, newstack);
1.398     uebayasi 1162:        if (error != 0)
1.55      cgd      1163:                goto exec_abort;
1.109     simonb   1164:
1.307     pooka    1165:        cwdexec(p);
1.270     ad       1166:        fd_closeexec();         /* handle close on exec */
1.315     alnsn    1167:
                   1168:        if (__predict_false(ktrace_on))
                   1169:                fd_ktrexecfd();
                   1170:
1.55      cgd      1171:        execsigs(p);            /* reset catched signals */
1.183     junyoung 1172:
1.380     uebayasi 1173:        mutex_enter(p->p_lock);
1.164     thorpej  1174:        l->l_ctxlink = NULL;    /* reset ucontext link */
1.55      cgd      1175:        p->p_acflag &= ~AFORK;
1.238     pavel    1176:        p->p_flag |= PK_EXEC;
1.272     ad       1177:        mutex_exit(p->p_lock);
1.237     ad       1178:
                   1179:        /*
                   1180:         * Stop profiling.
                   1181:         */
                   1182:        if ((p->p_stflag & PST_PROFIL) != 0) {
                   1183:                mutex_spin_enter(&p->p_stmutex);
                   1184:                stopprofclock(p);
                   1185:                mutex_spin_exit(&p->p_stmutex);
                   1186:        }
                   1187:
                   1188:        /*
1.275     ad       1189:         * It's OK to test PL_PPWAIT unlocked here, as other LWPs have
1.237     ad       1190:         * exited and exec()/exit() are the only places it will be cleared.
                   1191:         */
1.275     ad       1192:        if ((p->p_lflag & PL_PPWAIT) != 0) {
1.354     christos 1193: #if 0
1.353     rmind    1194:                lwp_t *lp;
                   1195:
1.271     ad       1196:                mutex_enter(proc_lock);
1.353     rmind    1197:                lp = p->p_vforklwp;
                   1198:                p->p_vforklwp = NULL;
                   1199:
1.308     pooka    1200:                l->l_lwpctl = NULL; /* was on loan from blocked parent */
1.275     ad       1201:                p->p_lflag &= ~PL_PPWAIT;
1.353     rmind    1202:
                   1203:                lp->l_pflag &= ~LP_VFORKWAIT; /* XXX */
                   1204:                cv_broadcast(&lp->l_waitcv);
1.271     ad       1205:                mutex_exit(proc_lock);
1.354     christos 1206: #else
                   1207:                mutex_enter(proc_lock);
                   1208:                l->l_lwpctl = NULL; /* was on loan from blocked parent */
                   1209:                p->p_lflag &= ~PL_PPWAIT;
                   1210:                cv_broadcast(&p->p_pptr->p_waitcv);
                   1211:                mutex_exit(proc_lock);
                   1212: #endif
1.55      cgd      1213:        }
                   1214:
1.387     uebayasi 1215:        error = credexec(l, &data->ed_attr);
                   1216:        if (error)
                   1217:                goto exec_abort;
1.221     ad       1218:
1.155     gmcgarry 1219: #if defined(__HAVE_RAS)
                   1220:        /*
                   1221:         * Remove all RASs from the address space.
                   1222:         */
1.251     ad       1223:        ras_purgeall();
1.155     gmcgarry 1224: #endif
1.107     fvdl     1225:
                   1226:        doexechooks(p);
1.55      cgd      1227:
1.390     uebayasi 1228:        /*
                   1229:         * Set initial SP at the top of the stack.
                   1230:         *
                   1231:         * Note that on machines where stack grows up (e.g. hppa), SP points to
                   1232:         * the end of arg/env strings.  Userland guesses the address of argc
                   1233:         * via ps_strings::ps_argvstr.
                   1234:         */
                   1235:
                   1236:        /* Setup new registers and do misc. setup. */
1.397     uebayasi 1237:        (*epp->ep_esch->es_emul->e_setregs)(l, epp, (vaddr_t)newstack);
1.378     uebayasi 1238:        if (epp->ep_esch->es_setregs)
1.397     uebayasi 1239:                (*epp->ep_esch->es_setregs)(l, epp, (vaddr_t)newstack);
1.55      cgd      1240:
1.309     joerg    1241:        /* Provide a consistent LWP private setting */
                   1242:        (void)lwp_setprivate(l, NULL);
                   1243:
1.316     matt     1244:        /* Discard all PCU state; need to start fresh */
                   1245:        pcu_discard_all(l);
                   1246:
1.171     chs      1247:        /* map the process's signal trampoline code */
1.378     uebayasi 1248:        if ((error = exec_sigcode_map(p, epp->ep_esch->es_emul)) != 0) {
1.312     christos 1249:                DPRINTF(("%s: map sigcode failed %d\n", __func__, error));
1.171     chs      1250:                goto exec_abort;
1.209     christos 1251:        }
1.171     chs      1252:
1.337     martin   1253:        pool_put(&exec_pool, data->ed_argp);
1.276     ad       1254:
                   1255:        /* notify others that we exec'd */
                   1256:        KNOTE(&p->p_klist, NOTE_EXEC);
                   1257:
1.378     uebayasi 1258:        kmem_free(epp->ep_hdr, epp->ep_hdrlen);
1.122     jdolecek 1259:
1.418     christos 1260:        SDT_PROBE(proc, kernel, , exec__success, epp->ep_kname, 0, 0, 0, 0);
1.294     darran   1261:
1.406     uebayasi 1262:        emulexec(l, epp);
1.85      mycroft  1263:
1.252     ad       1264:        /* Allow new references from the debugger/procfs. */
1.341     martin   1265:        rw_exit(&p->p_reflock);
1.348     martin   1266:        if (!no_local_exec_lock)
                   1267:                rw_exit(&exec_lock);
1.162     manu     1268:
1.271     ad       1269:        mutex_enter(proc_lock);
1.237     ad       1270:
                   1271:        if ((p->p_slflag & (PSL_TRACED|PSL_SYSCALL)) == PSL_TRACED) {
1.383     uebayasi 1272:                ksiginfo_t ksi;
                   1273:
1.237     ad       1274:                KSI_INIT_EMPTY(&ksi);
                   1275:                ksi.ksi_signo = SIGTRAP;
                   1276:                ksi.ksi_lid = l->l_lid;
                   1277:                kpsignal(p, &ksi, NULL);
                   1278:        }
1.162     manu     1279:
1.237     ad       1280:        if (p->p_sflag & PS_STOPEXEC) {
1.383     uebayasi 1281:                ksiginfoq_t kq;
                   1282:
1.237     ad       1283:                KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1.175     dsl      1284:                p->p_pptr->p_nstopchild++;
1.419     pgoyette 1285:                p->p_waited = 0;
1.272     ad       1286:                mutex_enter(p->p_lock);
1.237     ad       1287:                ksiginfo_queue_init(&kq);
                   1288:                sigclearall(p, &contsigmask, &kq);
                   1289:                lwp_lock(l);
                   1290:                l->l_stat = LSSTOP;
1.162     manu     1291:                p->p_stat = SSTOP;
1.164     thorpej  1292:                p->p_nrlwps--;
1.304     rmind    1293:                lwp_unlock(l);
1.272     ad       1294:                mutex_exit(p->p_lock);
1.271     ad       1295:                mutex_exit(proc_lock);
1.304     rmind    1296:                lwp_lock(l);
1.245     yamt     1297:                mi_switch(l);
1.237     ad       1298:                ksiginfo_queue_drain(&kq);
                   1299:                KERNEL_LOCK(l->l_biglocks, l);
                   1300:        } else {
1.271     ad       1301:                mutex_exit(proc_lock);
1.162     manu     1302:        }
                   1303:
1.337     martin   1304:        pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
                   1305:        pathbuf_destroy(data->ed_pathbuf);
                   1306:        PNBUF_PUT(data->ed_resolvedpathbuf);
1.327     reinoud  1307:        DPRINTF(("%s finished\n", __func__));
1.374     martin   1308:        return EJUSTRETURN;
1.55      cgd      1309:
1.138     lukem    1310:  exec_abort:
1.418     christos 1311:        SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0);
1.297     rmind    1312:        rw_exit(&p->p_reflock);
1.348     martin   1313:        if (!no_local_exec_lock)
                   1314:                rw_exit(&exec_lock);
1.297     rmind    1315:
1.352     rmind    1316:        pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
                   1317:        pathbuf_destroy(data->ed_pathbuf);
                   1318:        PNBUF_PUT(data->ed_resolvedpathbuf);
                   1319:
1.55      cgd      1320:        /*
                   1321:         * the old process doesn't exist anymore.  exit gracefully.
                   1322:         * get rid of the (new) address space we have created, if any, get rid
                   1323:         * of our namei data and vnode, and exit noting failure
                   1324:         */
1.88      mrg      1325:        uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
1.352     rmind    1326:                VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
1.348     martin   1327:
1.378     uebayasi 1328:        exec_free_emul_arg(epp);
1.337     martin   1329:        pool_put(&exec_pool, data->ed_argp);
1.378     uebayasi 1330:        kmem_free(epp->ep_hdr, epp->ep_hdrlen);
                   1331:        if (epp->ep_emul_root != NULL)
                   1332:                vrele(epp->ep_emul_root);
                   1333:        if (epp->ep_interp != NULL)
                   1334:                vrele(epp->ep_interp);
1.237     ad       1335:
1.252     ad       1336:        /* Acquire the sched-state mutex (exit1() will release it). */
1.348     martin   1337:        if (!is_spawn) {
1.337     martin   1338:                mutex_enter(p->p_lock);
                   1339:                exit1(l, W_EXITCODE(error, SIGABRT));
                   1340:        }
1.55      cgd      1341:
1.348     martin   1342:        return error;
1.67      christos 1343: }
                   1344:
1.144     christos 1345: int
1.337     martin   1346: execve1(struct lwp *l, const char *path, char * const *args,
                   1347:     char * const *envs, execve_fetch_element_t fetch_element)
                   1348: {
                   1349:        struct execve_data data;
                   1350:        int error;
                   1351:
                   1352:        error = execve_loadvm(l, path, args, envs, fetch_element, &data);
                   1353:        if (error)
                   1354:                return error;
1.348     martin   1355:        error = execve_runproc(l, &data, false, false);
1.337     martin   1356:        return error;
                   1357: }
                   1358:
1.396     uebayasi 1359: static size_t
1.411     christos 1360: fromptrsz(const struct exec_package *epp)
                   1361: {
                   1362:        return (epp->ep_flags & EXEC_FROM32) ? sizeof(int) : sizeof(char *);
                   1363: }
                   1364:
                   1365: static size_t
1.409     christos 1366: ptrsz(const struct exec_package *epp)
                   1367: {
1.411     christos 1368:        return (epp->ep_flags & EXEC_32) ? sizeof(int) : sizeof(char *);
1.409     christos 1369: }
                   1370:
                   1371: static size_t
1.396     uebayasi 1372: calcargs(struct execve_data * restrict data, const size_t argenvstrlen)
                   1373: {
                   1374:        struct exec_package     * const epp = &data->ed_pack;
                   1375:
                   1376:        const size_t nargenvptrs =
1.402     uebayasi 1377:            1 +                         /* long argc */
1.396     uebayasi 1378:            data->ed_argc +             /* char *argv[] */
                   1379:            1 +                         /* \0 */
                   1380:            data->ed_envc +             /* char *env[] */
                   1381:            1 +                         /* \0 */
                   1382:            epp->ep_esch->es_arglen;    /* auxinfo */
                   1383:
1.409     christos 1384:        return (nargenvptrs * ptrsz(epp)) + argenvstrlen;
1.396     uebayasi 1385: }
                   1386:
                   1387: static size_t
                   1388: calcstack(struct execve_data * restrict data, const size_t gaplen)
                   1389: {
                   1390:        struct exec_package     * const epp = &data->ed_pack;
                   1391:
                   1392:        data->ed_szsigcode = epp->ep_esch->es_emul->e_esigcode -
                   1393:            epp->ep_esch->es_emul->e_sigcode;
                   1394:
                   1395:        data->ed_ps_strings_sz = (epp->ep_flags & EXEC_32) ?
                   1396:            sizeof(struct ps_strings32) : sizeof(struct ps_strings);
                   1397:
                   1398:        const size_t sigcode_psstr_sz =
                   1399:            data->ed_szsigcode +        /* sigcode */
                   1400:            data->ed_ps_strings_sz +    /* ps_strings */
                   1401:            STACK_PTHREADSPACE;         /* pthread space */
                   1402:
                   1403:        const size_t stacklen =
                   1404:            data->ed_argslen +
                   1405:            gaplen +
                   1406:            sigcode_psstr_sz;
                   1407:
                   1408:        /* make the stack "safely" aligned */
                   1409:        return STACK_LEN_ALIGN(stacklen, STACK_ALIGNBYTES);
                   1410: }
                   1411:
1.391     uebayasi 1412: static int
1.399     uebayasi 1413: copyoutargs(struct execve_data * restrict data, struct lwp *l,
                   1414:     char * const newstack)
                   1415: {
                   1416:        struct exec_package     * const epp = &data->ed_pack;
                   1417:        struct proc             *p = l->l_proc;
                   1418:        int                     error;
                   1419:
                   1420:        /* remember information about the process */
                   1421:        data->ed_arginfo.ps_nargvstr = data->ed_argc;
                   1422:        data->ed_arginfo.ps_nenvstr = data->ed_envc;
                   1423:
                   1424:        /*
                   1425:         * Allocate the stack address passed to the newly execve()'ed process.
                   1426:         *
                   1427:         * The new stack address will be set to the SP (stack pointer) register
                   1428:         * in setregs().
                   1429:         */
                   1430:
                   1431:        char *newargs = STACK_ALLOC(
                   1432:            STACK_SHRINK(newstack, data->ed_argslen), data->ed_argslen);
                   1433:
                   1434:        error = (*epp->ep_esch->es_copyargs)(l, epp,
                   1435:            &data->ed_arginfo, &newargs, data->ed_argp);
                   1436:
                   1437:        if (epp->ep_path) {
                   1438:                PNBUF_PUT(epp->ep_path);
                   1439:                epp->ep_path = NULL;
                   1440:        }
                   1441:        if (error) {
                   1442:                DPRINTF(("%s: copyargs failed %d\n", __func__, error));
                   1443:                return error;
                   1444:        }
                   1445:
                   1446:        error = copyoutpsstrs(data, p);
                   1447:        if (error != 0)
                   1448:                return error;
                   1449:
                   1450:        return 0;
                   1451: }
                   1452:
                   1453: static int
1.398     uebayasi 1454: copyoutpsstrs(struct execve_data * restrict data, struct proc *p)
                   1455: {
                   1456:        struct exec_package     * const epp = &data->ed_pack;
                   1457:        struct ps_strings32     arginfo32;
                   1458:        void                    *aip;
                   1459:        int                     error;
                   1460:
                   1461:        /* fill process ps_strings info */
                   1462:        p->p_psstrp = (vaddr_t)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
                   1463:            STACK_PTHREADSPACE), data->ed_ps_strings_sz);
                   1464:
                   1465:        if (epp->ep_flags & EXEC_32) {
                   1466:                aip = &arginfo32;
                   1467:                arginfo32.ps_argvstr = (vaddr_t)data->ed_arginfo.ps_argvstr;
                   1468:                arginfo32.ps_nargvstr = data->ed_arginfo.ps_nargvstr;
                   1469:                arginfo32.ps_envstr = (vaddr_t)data->ed_arginfo.ps_envstr;
                   1470:                arginfo32.ps_nenvstr = data->ed_arginfo.ps_nenvstr;
                   1471:        } else
                   1472:                aip = &data->ed_arginfo;
                   1473:
                   1474:        /* copy out the process's ps_strings structure */
                   1475:        if ((error = copyout(aip, (void *)p->p_psstrp, data->ed_ps_strings_sz))
                   1476:            != 0) {
                   1477:                DPRINTF(("%s: ps_strings copyout %p->%p size %zu failed\n",
                   1478:                    __func__, aip, (void *)p->p_psstrp, data->ed_ps_strings_sz));
                   1479:                return error;
                   1480:        }
                   1481:
                   1482:        return 0;
                   1483: }
                   1484:
                   1485: static int
1.391     uebayasi 1486: copyinargs(struct execve_data * restrict data, char * const *args,
                   1487:     char * const *envs, execve_fetch_element_t fetch_element, char **dpp)
                   1488: {
                   1489:        struct exec_package     * const epp = &data->ed_pack;
1.392     uebayasi 1490:        char                    *dp;
1.391     uebayasi 1491:        size_t                  i;
                   1492:        int                     error;
                   1493:
                   1494:        dp = *dpp;
                   1495:
                   1496:        data->ed_argc = 0;
                   1497:
                   1498:        /* copy the fake args list, if there's one, freeing it as we go */
                   1499:        if (epp->ep_flags & EXEC_HASARGL) {
1.405     uebayasi 1500:                struct exec_fakearg     *fa = epp->ep_fa;
1.391     uebayasi 1501:
1.405     uebayasi 1502:                while (fa->fa_arg != NULL) {
1.394     uebayasi 1503:                        const size_t maxlen = ARG_MAX - (dp - data->ed_argp);
                   1504:                        size_t len;
1.391     uebayasi 1505:
1.405     uebayasi 1506:                        len = strlcpy(dp, fa->fa_arg, maxlen);
1.394     uebayasi 1507:                        /* Count NUL into len. */
                   1508:                        if (len < maxlen)
                   1509:                                len++;
1.404     uebayasi 1510:                        else {
1.405     uebayasi 1511:                                while (fa->fa_arg != NULL) {
                   1512:                                        kmem_free(fa->fa_arg, fa->fa_len);
                   1513:                                        fa++;
1.404     uebayasi 1514:                                }
                   1515:                                kmem_free(epp->ep_fa, epp->ep_fa_len);
                   1516:                                epp->ep_flags &= ~EXEC_HASARGL;
1.395     uebayasi 1517:                                return E2BIG;
1.404     uebayasi 1518:                        }
1.405     uebayasi 1519:                        ktrexecarg(fa->fa_arg, len - 1);
1.394     uebayasi 1520:                        dp += len;
1.391     uebayasi 1521:
1.405     uebayasi 1522:                        kmem_free(fa->fa_arg, fa->fa_len);
                   1523:                        fa++;
1.391     uebayasi 1524:                        data->ed_argc++;
                   1525:                }
                   1526:                kmem_free(epp->ep_fa, epp->ep_fa_len);
                   1527:                epp->ep_flags &= ~EXEC_HASARGL;
                   1528:        }
                   1529:
1.392     uebayasi 1530:        /*
                   1531:         * Read and count argument strings from user.
                   1532:         */
                   1533:
1.391     uebayasi 1534:        if (args == NULL) {
                   1535:                DPRINTF(("%s: null args\n", __func__));
                   1536:                return EINVAL;
                   1537:        }
1.392     uebayasi 1538:        if (epp->ep_flags & EXEC_SKIPARG)
1.411     christos 1539:                args = (const void *)((const char *)args + fromptrsz(epp));
1.391     uebayasi 1540:        i = 0;
1.392     uebayasi 1541:        error = copyinargstrs(data, args, fetch_element, &dp, &i, ktr_execarg);
                   1542:        if (error != 0) {
                   1543:                DPRINTF(("%s: copyin arg %d\n", __func__, error));
                   1544:                return error;
                   1545:        }
                   1546:        data->ed_argc += i;
                   1547:
                   1548:        /*
                   1549:         * Read and count environment strings from user.
                   1550:         */
                   1551:
                   1552:        data->ed_envc = 0;
                   1553:        /* environment need not be there */
                   1554:        if (envs == NULL)
                   1555:                goto done;
                   1556:        i = 0;
                   1557:        error = copyinargstrs(data, envs, fetch_element, &dp, &i, ktr_execenv);
                   1558:        if (error != 0) {
                   1559:                DPRINTF(("%s: copyin env %d\n", __func__, error));
                   1560:                return error;
                   1561:        }
                   1562:        data->ed_envc += i;
                   1563:
                   1564: done:
                   1565:        *dpp = dp;
                   1566:
                   1567:        return 0;
                   1568: }
                   1569:
                   1570: static int
                   1571: copyinargstrs(struct execve_data * restrict data, char * const *strs,
                   1572:     execve_fetch_element_t fetch_element, char **dpp, size_t *ip,
                   1573:     void (*ktr)(const void *, size_t))
                   1574: {
                   1575:        char                    *dp, *sp;
                   1576:        size_t                  i;
                   1577:        int                     error;
                   1578:
                   1579:        dp = *dpp;
1.391     uebayasi 1580:
1.392     uebayasi 1581:        i = 0;
1.391     uebayasi 1582:        while (1) {
1.394     uebayasi 1583:                const size_t maxlen = ARG_MAX - (dp - data->ed_argp);
1.391     uebayasi 1584:                size_t len;
                   1585:
1.392     uebayasi 1586:                if ((error = (*fetch_element)(strs, i, &sp)) != 0) {
1.391     uebayasi 1587:                        return error;
                   1588:                }
                   1589:                if (!sp)
                   1590:                        break;
                   1591:                if ((error = copyinstr(sp, dp, maxlen, &len)) != 0) {
                   1592:                        if (error == ENAMETOOLONG)
                   1593:                                error = E2BIG;
                   1594:                        return error;
                   1595:                }
1.392     uebayasi 1596:                if (__predict_false(ktrace_on))
                   1597:                        (*ktr)(dp, len - 1);
1.391     uebayasi 1598:                dp += len;
                   1599:                i++;
                   1600:        }
                   1601:
                   1602:        *dpp = dp;
1.392     uebayasi 1603:        *ip = i;
1.391     uebayasi 1604:
                   1605:        return 0;
                   1606: }
                   1607:
1.382     uebayasi 1608: /*
                   1609:  * Copy argv and env strings from kernel buffer (argp) to the new stack.
                   1610:  * Those strings are located just after auxinfo.
                   1611:  */
1.337     martin   1612: int
1.231     yamt     1613: copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
                   1614:     char **stackp, void *argp)
1.67      christos 1615: {
1.138     lukem    1616:        char    **cpp, *dp, *sp;
                   1617:        size_t  len;
                   1618:        void    *nullp;
                   1619:        long    argc, envc;
1.144     christos 1620:        int     error;
1.138     lukem    1621:
1.144     christos 1622:        cpp = (char **)*stackp;
1.138     lukem    1623:        nullp = NULL;
                   1624:        argc = arginfo->ps_nargvstr;
                   1625:        envc = arginfo->ps_nenvstr;
1.382     uebayasi 1626:
                   1627:        /* argc on stack is long */
                   1628:        CTASSERT(sizeof(*cpp) == sizeof(argc));
                   1629:
                   1630:        dp = (char *)(cpp +
1.402     uebayasi 1631:            1 +                         /* long argc */
                   1632:            argc +                      /* char *argv[] */
1.382     uebayasi 1633:            1 +                         /* \0 */
1.402     uebayasi 1634:            envc +                      /* char *env[] */
1.382     uebayasi 1635:            1 +                         /* \0 */
1.385     uebayasi 1636:            /* XXX auxinfo multiplied by ptr size? */
1.382     uebayasi 1637:            pack->ep_esch->es_arglen);  /* auxinfo */
                   1638:        sp = argp;
                   1639:
1.305     matt     1640:        if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0) {
1.312     christos 1641:                COPYPRINTF("", cpp - 1, sizeof(argc));
1.144     christos 1642:                return error;
1.305     matt     1643:        }
1.67      christos 1644:
                   1645:        /* XXX don't copy them out, remap them! */
1.69      mycroft  1646:        arginfo->ps_argvstr = cpp; /* remember location of argv for later */
1.67      christos 1647:
1.305     matt     1648:        for (; --argc >= 0; sp += len, dp += len) {
                   1649:                if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
1.312     christos 1650:                        COPYPRINTF("", cpp - 1, sizeof(dp));
1.305     matt     1651:                        return error;
                   1652:                }
                   1653:                if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
1.313     jakllsch 1654:                        COPYPRINTF("str", dp, (size_t)ARG_MAX);
1.144     christos 1655:                        return error;
1.305     matt     1656:                }
                   1657:        }
1.67      christos 1658:
1.305     matt     1659:        if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
1.312     christos 1660:                COPYPRINTF("", cpp - 1, sizeof(nullp));
1.144     christos 1661:                return error;
1.305     matt     1662:        }
1.67      christos 1663:
1.69      mycroft  1664:        arginfo->ps_envstr = cpp; /* remember location of envp for later */
1.67      christos 1665:
1.305     matt     1666:        for (; --envc >= 0; sp += len, dp += len) {
                   1667:                if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
1.312     christos 1668:                        COPYPRINTF("", cpp - 1, sizeof(dp));
1.144     christos 1669:                        return error;
1.305     matt     1670:                }
                   1671:                if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
1.313     jakllsch 1672:                        COPYPRINTF("str", dp, (size_t)ARG_MAX);
1.305     matt     1673:                        return error;
                   1674:                }
1.337     martin   1675:
1.305     matt     1676:        }
1.67      christos 1677:
1.305     matt     1678:        if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
1.312     christos 1679:                COPYPRINTF("", cpp - 1, sizeof(nullp));
1.144     christos 1680:                return error;
1.305     matt     1681:        }
1.67      christos 1682:
1.144     christos 1683:        *stackp = (char *)cpp;
                   1684:        return 0;
1.55      cgd      1685: }
1.130     jdolecek 1686:
                   1687:
                   1688: /*
1.282     ad       1689:  * Add execsw[] entries.
1.130     jdolecek 1690:  */
                   1691: int
1.282     ad       1692: exec_add(struct execsw *esp, int count)
1.130     jdolecek 1693: {
1.282     ad       1694:        struct exec_entry       *it;
                   1695:        int                     i;
1.130     jdolecek 1696:
1.283     ad       1697:        if (count == 0) {
                   1698:                return 0;
                   1699:        }
1.130     jdolecek 1700:
1.282     ad       1701:        /* Check for duplicates. */
1.237     ad       1702:        rw_enter(&exec_lock, RW_WRITER);
1.282     ad       1703:        for (i = 0; i < count; i++) {
                   1704:                LIST_FOREACH(it, &ex_head, ex_list) {
                   1705:                        /* assume unique (makecmds, probe_func, emulation) */
                   1706:                        if (it->ex_sw->es_makecmds == esp[i].es_makecmds &&
                   1707:                            it->ex_sw->u.elf_probe_func ==
                   1708:                            esp[i].u.elf_probe_func &&
                   1709:                            it->ex_sw->es_emul == esp[i].es_emul) {
                   1710:                                rw_exit(&exec_lock);
                   1711:                                return EEXIST;
1.130     jdolecek 1712:                        }
                   1713:                }
                   1714:        }
                   1715:
1.282     ad       1716:        /* Allocate new entries. */
                   1717:        for (i = 0; i < count; i++) {
                   1718:                it = kmem_alloc(sizeof(*it), KM_SLEEP);
                   1719:                it->ex_sw = &esp[i];
                   1720:                LIST_INSERT_HEAD(&ex_head, it, ex_list);
1.130     jdolecek 1721:        }
                   1722:
                   1723:        /* update execsw[] */
                   1724:        exec_init(0);
1.237     ad       1725:        rw_exit(&exec_lock);
1.282     ad       1726:        return 0;
1.130     jdolecek 1727: }
                   1728:
                   1729: /*
                   1730:  * Remove execsw[] entry.
                   1731:  */
                   1732: int
1.282     ad       1733: exec_remove(struct execsw *esp, int count)
1.130     jdolecek 1734: {
1.282     ad       1735:        struct exec_entry       *it, *next;
                   1736:        int                     i;
                   1737:        const struct proclist_desc *pd;
                   1738:        proc_t                  *p;
                   1739:
1.283     ad       1740:        if (count == 0) {
                   1741:                return 0;
                   1742:        }
1.130     jdolecek 1743:
1.282     ad       1744:        /* Abort if any are busy. */
1.237     ad       1745:        rw_enter(&exec_lock, RW_WRITER);
1.282     ad       1746:        for (i = 0; i < count; i++) {
                   1747:                mutex_enter(proc_lock);
                   1748:                for (pd = proclists; pd->pd_list != NULL; pd++) {
                   1749:                        PROCLIST_FOREACH(p, pd->pd_list) {
                   1750:                                if (p->p_execsw == &esp[i]) {
                   1751:                                        mutex_exit(proc_lock);
                   1752:                                        rw_exit(&exec_lock);
                   1753:                                        return EBUSY;
                   1754:                                }
                   1755:                        }
                   1756:                }
                   1757:                mutex_exit(proc_lock);
                   1758:        }
1.130     jdolecek 1759:
1.282     ad       1760:        /* None are busy, so remove them all. */
                   1761:        for (i = 0; i < count; i++) {
                   1762:                for (it = LIST_FIRST(&ex_head); it != NULL; it = next) {
                   1763:                        next = LIST_NEXT(it, ex_list);
                   1764:                        if (it->ex_sw == &esp[i]) {
                   1765:                                LIST_REMOVE(it, ex_list);
                   1766:                                kmem_free(it, sizeof(*it));
                   1767:                                break;
                   1768:                        }
                   1769:                }
1.130     jdolecek 1770:        }
                   1771:
                   1772:        /* update execsw[] */
                   1773:        exec_init(0);
1.237     ad       1774:        rw_exit(&exec_lock);
1.282     ad       1775:        return 0;
1.130     jdolecek 1776: }
                   1777:
                   1778: /*
                   1779:  * Initialize exec structures. If init_boot is true, also does necessary
                   1780:  * one-time initialization (it's called from main() that way).
1.147     jdolecek 1781:  * Once system is multiuser, this should be called with exec_lock held,
1.130     jdolecek 1782:  * i.e. via exec_{add|remove}().
                   1783:  */
                   1784: int
1.138     lukem    1785: exec_init(int init_boot)
1.130     jdolecek 1786: {
1.282     ad       1787:        const struct execsw     **sw;
                   1788:        struct exec_entry       *ex;
                   1789:        SLIST_HEAD(,exec_entry) first;
                   1790:        SLIST_HEAD(,exec_entry) any;
                   1791:        SLIST_HEAD(,exec_entry) last;
                   1792:        int                     i, sz;
1.130     jdolecek 1793:
                   1794:        if (init_boot) {
                   1795:                /* do one-time initializations */
1.237     ad       1796:                rw_init(&exec_lock);
1.259     ad       1797:                mutex_init(&sigobject_lock, MUTEX_DEFAULT, IPL_NONE);
1.277     ad       1798:                pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
                   1799:                    "execargs", &exec_palloc, IPL_NONE);
                   1800:                pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
1.282     ad       1801:        } else {
                   1802:                KASSERT(rw_write_held(&exec_lock));
                   1803:        }
1.130     jdolecek 1804:
1.282     ad       1805:        /* Sort each entry onto the appropriate queue. */
                   1806:        SLIST_INIT(&first);
                   1807:        SLIST_INIT(&any);
                   1808:        SLIST_INIT(&last);
                   1809:        sz = 0;
                   1810:        LIST_FOREACH(ex, &ex_head, ex_list) {
                   1811:                switch(ex->ex_sw->es_prio) {
                   1812:                case EXECSW_PRIO_FIRST:
                   1813:                        SLIST_INSERT_HEAD(&first, ex, ex_slist);
                   1814:                        break;
                   1815:                case EXECSW_PRIO_ANY:
                   1816:                        SLIST_INSERT_HEAD(&any, ex, ex_slist);
                   1817:                        break;
                   1818:                case EXECSW_PRIO_LAST:
                   1819:                        SLIST_INSERT_HEAD(&last, ex, ex_slist);
                   1820:                        break;
                   1821:                default:
1.312     christos 1822:                        panic("%s", __func__);
1.282     ad       1823:                        break;
1.130     jdolecek 1824:                }
1.282     ad       1825:                sz++;
1.130     jdolecek 1826:        }
                   1827:
                   1828:        /*
1.282     ad       1829:         * Create new execsw[].  Ensure we do not try a zero-sized
                   1830:         * allocation.
1.130     jdolecek 1831:         */
1.282     ad       1832:        sw = kmem_alloc(sz * sizeof(struct execsw *) + 1, KM_SLEEP);
                   1833:        i = 0;
                   1834:        SLIST_FOREACH(ex, &first, ex_slist) {
                   1835:                sw[i++] = ex->ex_sw;
                   1836:        }
                   1837:        SLIST_FOREACH(ex, &any, ex_slist) {
                   1838:                sw[i++] = ex->ex_sw;
                   1839:        }
                   1840:        SLIST_FOREACH(ex, &last, ex_slist) {
                   1841:                sw[i++] = ex->ex_sw;
1.130     jdolecek 1842:        }
1.183     junyoung 1843:
1.282     ad       1844:        /* Replace old execsw[] and free used memory. */
                   1845:        if (execsw != NULL) {
                   1846:                kmem_free(__UNCONST(execsw),
                   1847:                    nexecs * sizeof(struct execsw *) + 1);
1.130     jdolecek 1848:        }
1.282     ad       1849:        execsw = sw;
                   1850:        nexecs = sz;
1.130     jdolecek 1851:
1.282     ad       1852:        /* Figure out the maximum size of an exec header. */
                   1853:        exec_maxhdrsz = sizeof(int);
1.130     jdolecek 1854:        for (i = 0; i < nexecs; i++) {
                   1855:                if (execsw[i]->es_hdrsz > exec_maxhdrsz)
                   1856:                        exec_maxhdrsz = execsw[i]->es_hdrsz;
                   1857:        }
                   1858:
                   1859:        return 0;
                   1860: }
1.171     chs      1861:
                   1862: static int
                   1863: exec_sigcode_map(struct proc *p, const struct emul *e)
                   1864: {
                   1865:        vaddr_t va;
                   1866:        vsize_t sz;
                   1867:        int error;
                   1868:        struct uvm_object *uobj;
                   1869:
1.184     drochner 1870:        sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
                   1871:
                   1872:        if (e->e_sigobject == NULL || sz == 0) {
1.171     chs      1873:                return 0;
                   1874:        }
                   1875:
                   1876:        /*
                   1877:         * If we don't have a sigobject for this emulation, create one.
                   1878:         *
                   1879:         * sigobject is an anonymous memory object (just like SYSV shared
                   1880:         * memory) that we keep a permanent reference to and that we map
                   1881:         * in all processes that need this sigcode. The creation is simple,
                   1882:         * we create an object, add a permanent reference to it, map it in
                   1883:         * kernel space, copy out the sigcode to it and unmap it.
1.189     jdolecek 1884:         * We map it with PROT_READ|PROT_EXEC into the process just
                   1885:         * the way sys_mmap() would map it.
1.171     chs      1886:         */
                   1887:
                   1888:        uobj = *e->e_sigobject;
                   1889:        if (uobj == NULL) {
1.259     ad       1890:                mutex_enter(&sigobject_lock);
                   1891:                if ((uobj = *e->e_sigobject) == NULL) {
                   1892:                        uobj = uao_create(sz, 0);
                   1893:                        (*uobj->pgops->pgo_reference)(uobj);
                   1894:                        va = vm_map_min(kernel_map);
                   1895:                        if ((error = uvm_map(kernel_map, &va, round_page(sz),
                   1896:                            uobj, 0, 0,
                   1897:                            UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
                   1898:                            UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
                   1899:                                printf("kernel mapping failed %d\n", error);
                   1900:                                (*uobj->pgops->pgo_detach)(uobj);
                   1901:                                mutex_exit(&sigobject_lock);
1.374     martin   1902:                                return error;
1.259     ad       1903:                        }
                   1904:                        memcpy((void *)va, e->e_sigcode, sz);
1.171     chs      1905: #ifdef PMAP_NEED_PROCWR
1.259     ad       1906:                        pmap_procwr(&proc0, va, sz);
1.171     chs      1907: #endif
1.259     ad       1908:                        uvm_unmap(kernel_map, va, va + round_page(sz));
                   1909:                        *e->e_sigobject = uobj;
                   1910:                }
                   1911:                mutex_exit(&sigobject_lock);
1.171     chs      1912:        }
                   1913:
1.172     enami    1914:        /* Just a hint to uvm_map where to put it. */
1.195     fvdl     1915:        va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
                   1916:            round_page(sz));
1.187     chs      1917:
                   1918: #ifdef __alpha__
                   1919:        /*
                   1920:         * Tru64 puts /sbin/loader at the end of user virtual memory,
                   1921:         * which causes the above calculation to put the sigcode at
                   1922:         * an invalid address.  Put it just below the text instead.
                   1923:         */
1.193     jmc      1924:        if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
1.187     chs      1925:                va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
                   1926:        }
                   1927: #endif
                   1928:
1.171     chs      1929:        (*uobj->pgops->pgo_reference)(uobj);
                   1930:        error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
                   1931:                        uobj, 0, 0,
                   1932:                        UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
                   1933:                                    UVM_ADV_RANDOM, 0));
                   1934:        if (error) {
1.312     christos 1935:                DPRINTF(("%s, %d: map %p "
1.305     matt     1936:                    "uvm_map %#"PRIxVSIZE"@%#"PRIxVADDR" failed %d\n",
1.312     christos 1937:                    __func__, __LINE__, &p->p_vmspace->vm_map, round_page(sz),
                   1938:                    va, error));
1.171     chs      1939:                (*uobj->pgops->pgo_detach)(uobj);
1.374     martin   1940:                return error;
1.171     chs      1941:        }
                   1942:        p->p_sigctx.ps_sigcode = (void *)va;
1.374     martin   1943:        return 0;
1.171     chs      1944: }
1.336     matt     1945:
1.337     martin   1946: /*
1.348     martin   1947:  * Release a refcount on spawn_exec_data and destroy memory, if this
                   1948:  * was the last one.
                   1949:  */
                   1950: static void
                   1951: spawn_exec_data_release(struct spawn_exec_data *data)
                   1952: {
                   1953:        if (atomic_dec_32_nv(&data->sed_refcnt) != 0)
                   1954:                return;
                   1955:
                   1956:        cv_destroy(&data->sed_cv_child_ready);
                   1957:        mutex_destroy(&data->sed_mtx_child);
                   1958:
                   1959:        if (data->sed_actions)
                   1960:                posix_spawn_fa_free(data->sed_actions,
                   1961:                    data->sed_actions->len);
                   1962:        if (data->sed_attrs)
                   1963:                kmem_free(data->sed_attrs,
                   1964:                    sizeof(*data->sed_attrs));
                   1965:        kmem_free(data, sizeof(*data));
                   1966: }
                   1967:
                   1968: /*
1.337     martin   1969:  * A child lwp of a posix_spawn operation starts here and ends up in
                   1970:  * cpu_spawn_return, dealing with all filedescriptor and scheduler
                   1971:  * manipulations in between.
1.369     christos 1972:  * The parent waits for the child, as it is not clear whether the child
                   1973:  * will be able to acquire its own exec_lock. If it can, the parent can
1.348     martin   1974:  * be released early and continue running in parallel. If not (or if the
                   1975:  * magic debug flag is passed in the scheduler attribute struct), the
1.369     christos 1976:  * child rides on the parent's exec lock until it is ready to return to
1.348     martin   1977:  * to userland - and only then releases the parent. This method loses
                   1978:  * concurrency, but improves error reporting.
1.337     martin   1979:  */
                   1980: static void
                   1981: spawn_return(void *arg)
                   1982: {
                   1983:        struct spawn_exec_data *spawn_data = arg;
                   1984:        struct lwp *l = curlwp;
                   1985:        int error, newfd;
1.420   ! pgoyette 1986:        int ostat;
1.337     martin   1987:        size_t i;
                   1988:        const struct posix_spawn_file_actions_entry *fae;
1.348     martin   1989:        pid_t ppid;
1.337     martin   1990:        register_t retval;
1.341     martin   1991:        bool have_reflock;
1.348     martin   1992:        bool parent_is_waiting = true;
1.345     martin   1993:
1.341     martin   1994:        /*
1.348     martin   1995:         * Check if we can release parent early.
                   1996:         * We either need to have no sed_attrs, or sed_attrs does not
                   1997:         * have POSIX_SPAWN_RETURNERROR or one of the flags, that require
                   1998:         * safe access to the parent proc (passed in sed_parent).
                   1999:         * We then try to get the exec_lock, and only if that works, we can
                   2000:         * release the parent here already.
                   2001:         */
                   2002:        ppid = spawn_data->sed_parent->p_pid;
                   2003:        if ((!spawn_data->sed_attrs
                   2004:            || (spawn_data->sed_attrs->sa_flags
                   2005:                & (POSIX_SPAWN_RETURNERROR|POSIX_SPAWN_SETPGROUP)) == 0)
                   2006:            && rw_tryenter(&exec_lock, RW_READER)) {
                   2007:                parent_is_waiting = false;
                   2008:                mutex_enter(&spawn_data->sed_mtx_child);
                   2009:                cv_signal(&spawn_data->sed_cv_child_ready);
                   2010:                mutex_exit(&spawn_data->sed_mtx_child);
                   2011:        }
1.341     martin   2012:
1.352     rmind    2013:        /* don't allow debugger access yet */
                   2014:        rw_enter(&l->l_proc->p_reflock, RW_WRITER);
                   2015:        have_reflock = true;
                   2016:
                   2017:        error = 0;
1.337     martin   2018:        /* handle posix_spawn_file_actions */
                   2019:        if (spawn_data->sed_actions != NULL) {
1.348     martin   2020:                for (i = 0; i < spawn_data->sed_actions->len; i++) {
                   2021:                        fae = &spawn_data->sed_actions->fae[i];
1.337     martin   2022:                        switch (fae->fae_action) {
                   2023:                        case FAE_OPEN:
1.338     martin   2024:                                if (fd_getfile(fae->fae_fildes) != NULL) {
                   2025:                                        error = fd_close(fae->fae_fildes);
                   2026:                                        if (error)
                   2027:                                                break;
                   2028:                                }
1.337     martin   2029:                                error = fd_open(fae->fae_path, fae->fae_oflag,
                   2030:                                    fae->fae_mode, &newfd);
1.376     maxv     2031:                                if (error)
                   2032:                                        break;
1.337     martin   2033:                                if (newfd != fae->fae_fildes) {
                   2034:                                        error = dodup(l, newfd,
                   2035:                                            fae->fae_fildes, 0, &retval);
                   2036:                                        if (fd_getfile(newfd) != NULL)
                   2037:                                                fd_close(newfd);
                   2038:                                }
                   2039:                                break;
                   2040:                        case FAE_DUP2:
                   2041:                                error = dodup(l, fae->fae_fildes,
                   2042:                                    fae->fae_newfildes, 0, &retval);
                   2043:                                break;
                   2044:                        case FAE_CLOSE:
                   2045:                                if (fd_getfile(fae->fae_fildes) == NULL) {
                   2046:                                        error = EBADF;
                   2047:                                        break;
                   2048:                                }
                   2049:                                error = fd_close(fae->fae_fildes);
                   2050:                                break;
                   2051:                        }
                   2052:                        if (error)
                   2053:                                goto report_error;
                   2054:                }
                   2055:        }
                   2056:
                   2057:        /* handle posix_spawnattr */
                   2058:        if (spawn_data->sed_attrs != NULL) {
                   2059:                struct sigaction sigact;
                   2060:                sigact._sa_u._sa_handler = SIG_DFL;
                   2061:                sigact.sa_flags = 0;
                   2062:
                   2063:                /*
                   2064:                 * set state to SSTOP so that this proc can be found by pid.
                   2065:                 * see proc_enterprp, do_sched_setparam below
                   2066:                 */
1.420   ! pgoyette 2067:                mutex_enter(proc_lock);
        !          2068:                /*
        !          2069:                 * p_stat should be SACTIVE, so we need to adjust the
        !          2070:                 * parent's p_nstopchild here.  For safety, just make
        !          2071:                 * we're on the good side of SDEAD before we adjust.
        !          2072:                 */
1.358     christos 2073:                ostat = l->l_proc->p_stat;
1.420   ! pgoyette 2074:                KASSERT(ostat < SSTOP);
1.337     martin   2075:                l->l_proc->p_stat = SSTOP;
1.420   ! pgoyette 2076:                l->l_proc->p_waited = 0;
        !          2077:                l->l_proc->p_pptr->p_nstopchild++;
        !          2078:                mutex_exit(proc_lock);
1.337     martin   2079:
                   2080:                /* Set process group */
                   2081:                if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETPGROUP) {
                   2082:                        pid_t mypid = l->l_proc->p_pid,
                   2083:                             pgrp = spawn_data->sed_attrs->sa_pgroup;
                   2084:
                   2085:                        if (pgrp == 0)
                   2086:                                pgrp = mypid;
                   2087:
                   2088:                        error = proc_enterpgrp(spawn_data->sed_parent,
                   2089:                            mypid, pgrp, false);
                   2090:                        if (error)
1.420   ! pgoyette 2091:                                goto report_error_stopped;
1.337     martin   2092:                }
                   2093:
                   2094:                /* Set scheduler policy */
                   2095:                if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSCHEDULER)
                   2096:                        error = do_sched_setparam(l->l_proc->p_pid, 0,
                   2097:                            spawn_data->sed_attrs->sa_schedpolicy,
                   2098:                            &spawn_data->sed_attrs->sa_schedparam);
                   2099:                else if (spawn_data->sed_attrs->sa_flags
                   2100:                    & POSIX_SPAWN_SETSCHEDPARAM) {
1.348     martin   2101:                        error = do_sched_setparam(ppid, 0,
1.337     martin   2102:                            SCHED_NONE, &spawn_data->sed_attrs->sa_schedparam);
                   2103:                }
                   2104:                if (error)
1.420   ! pgoyette 2105:                        goto report_error_stopped;
1.337     martin   2106:
                   2107:                /* Reset user ID's */
                   2108:                if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_RESETIDS) {
                   2109:                        error = do_setresuid(l, -1,
                   2110:                             kauth_cred_getgid(l->l_cred), -1,
                   2111:                             ID_E_EQ_R | ID_E_EQ_S);
                   2112:                        if (error)
1.420   ! pgoyette 2113:                                goto report_error_stopped;
1.337     martin   2114:                        error = do_setresuid(l, -1,
                   2115:                            kauth_cred_getuid(l->l_cred), -1,
                   2116:                            ID_E_EQ_R | ID_E_EQ_S);
                   2117:                        if (error)
1.420   ! pgoyette 2118:                                goto report_error_stopped;
1.337     martin   2119:                }
                   2120:
                   2121:                /* Set signal masks/defaults */
                   2122:                if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSIGMASK) {
                   2123:                        mutex_enter(l->l_proc->p_lock);
                   2124:                        error = sigprocmask1(l, SIG_SETMASK,
                   2125:                            &spawn_data->sed_attrs->sa_sigmask, NULL);
                   2126:                        mutex_exit(l->l_proc->p_lock);
                   2127:                        if (error)
1.420   ! pgoyette 2128:                                goto report_error_stopped;
1.337     martin   2129:                }
                   2130:
                   2131:                if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSIGDEF) {
1.375     christos 2132:                        /*
                   2133:                         * The following sigaction call is using a sigaction
                   2134:                         * version 0 trampoline which is in the compatibility
                   2135:                         * code only. This is not a problem because for SIG_DFL
                   2136:                         * and SIG_IGN, the trampolines are now ignored. If they
                   2137:                         * were not, this would be a problem because we are
                   2138:                         * holding the exec_lock, and the compat code needs
                   2139:                         * to do the same in order to replace the trampoline
                   2140:                         * code of the process.
                   2141:                         */
1.337     martin   2142:                        for (i = 1; i <= NSIG; i++) {
                   2143:                                if (sigismember(
                   2144:                                    &spawn_data->sed_attrs->sa_sigdefault, i))
                   2145:                                        sigaction1(l, i, &sigact, NULL, NULL,
                   2146:                                            0);
                   2147:                        }
                   2148:                }
1.420   ! pgoyette 2149:                mutex_enter(proc_lock);
1.358     christos 2150:                l->l_proc->p_stat = ostat;
1.420   ! pgoyette 2151:                l->l_proc->p_pptr->p_nstopchild--;
        !          2152:                mutex_exit(proc_lock);
1.337     martin   2153:        }
                   2154:
1.352     rmind    2155:        /* now do the real exec */
1.348     martin   2156:        error = execve_runproc(l, &spawn_data->sed_exec, parent_is_waiting,
                   2157:            true);
1.341     martin   2158:        have_reflock = false;
1.352     rmind    2159:        if (error == EJUSTRETURN)
                   2160:                error = 0;
                   2161:        else if (error)
1.337     martin   2162:                goto report_error;
                   2163:
1.348     martin   2164:        if (parent_is_waiting) {
                   2165:                mutex_enter(&spawn_data->sed_mtx_child);
                   2166:                cv_signal(&spawn_data->sed_cv_child_ready);
                   2167:                mutex_exit(&spawn_data->sed_mtx_child);
                   2168:        }
1.345     martin   2169:
1.348     martin   2170:        /* release our refcount on the data */
                   2171:        spawn_exec_data_release(spawn_data);
1.337     martin   2172:
1.369     christos 2173:        /* and finally: leave to userland for the first time */
1.337     martin   2174:        cpu_spawn_return(l);
                   2175:
                   2176:        /* NOTREACHED */
                   2177:        return;
                   2178:
1.420   ! pgoyette 2179:  report_error_stopped:
        !          2180:        mutex_enter(proc_lock);
        !          2181:        l->l_proc->p_stat = ostat;
        !          2182:        l->l_proc->p_pptr->p_nstopchild--;
        !          2183:        mutex_exit(proc_lock);
1.337     martin   2184:  report_error:
1.376     maxv     2185:        if (have_reflock) {
                   2186:                /*
1.350     martin   2187:                 * We have not passed through execve_runproc(),
                   2188:                 * which would have released the p_reflock and also
                   2189:                 * taken ownership of the sed_exec part of spawn_data,
                   2190:                 * so release/free both here.
                   2191:                 */
1.341     martin   2192:                rw_exit(&l->l_proc->p_reflock);
1.350     martin   2193:                execve_free_data(&spawn_data->sed_exec);
                   2194:        }
1.341     martin   2195:
1.348     martin   2196:        if (parent_is_waiting) {
                   2197:                /* pass error to parent */
                   2198:                mutex_enter(&spawn_data->sed_mtx_child);
                   2199:                spawn_data->sed_error = error;
                   2200:                cv_signal(&spawn_data->sed_cv_child_ready);
                   2201:                mutex_exit(&spawn_data->sed_mtx_child);
                   2202:        } else {
                   2203:                rw_exit(&exec_lock);
1.337     martin   2204:        }
                   2205:
1.348     martin   2206:        /* release our refcount on the data */
                   2207:        spawn_exec_data_release(spawn_data);
                   2208:
1.352     rmind    2209:        /* done, exit */
                   2210:        mutex_enter(l->l_proc->p_lock);
1.348     martin   2211:        /*
1.352     rmind    2212:         * Posix explicitly asks for an exit code of 127 if we report
1.348     martin   2213:         * errors from the child process - so, unfortunately, there
                   2214:         * is no way to report a more exact error code.
                   2215:         * A NetBSD specific workaround is POSIX_SPAWN_RETURNERROR as
                   2216:         * flag bit in the attrp argument to posix_spawn(2), see above.
                   2217:         */
1.349     martin   2218:        exit1(l, W_EXITCODE(127, 0));
1.337     martin   2219: }
                   2220:
1.348     martin   2221: void
1.344     christos 2222: posix_spawn_fa_free(struct posix_spawn_file_actions *fa, size_t len)
1.342     christos 2223: {
                   2224:
1.344     christos 2225:        for (size_t i = 0; i < len; i++) {
1.342     christos 2226:                struct posix_spawn_file_actions_entry *fae = &fa->fae[i];
                   2227:                if (fae->fae_action != FAE_OPEN)
                   2228:                        continue;
                   2229:                kmem_free(fae->fae_path, strlen(fae->fae_path) + 1);
                   2230:        }
1.348     martin   2231:        if (fa->len > 0)
1.343     christos 2232:                kmem_free(fa->fae, sizeof(*fa->fae) * fa->len);
1.342     christos 2233:        kmem_free(fa, sizeof(*fa));
                   2234: }
                   2235:
                   2236: static int
                   2237: posix_spawn_fa_alloc(struct posix_spawn_file_actions **fap,
1.373     martin   2238:     const struct posix_spawn_file_actions *ufa, rlim_t lim)
1.342     christos 2239: {
                   2240:        struct posix_spawn_file_actions *fa;
                   2241:        struct posix_spawn_file_actions_entry *fae;
                   2242:        char *pbuf = NULL;
                   2243:        int error;
1.352     rmind    2244:        size_t i = 0;
1.342     christos 2245:
                   2246:        fa = kmem_alloc(sizeof(*fa), KM_SLEEP);
                   2247:        error = copyin(ufa, fa, sizeof(*fa));
1.369     christos 2248:        if (error || fa->len == 0) {
1.348     martin   2249:                kmem_free(fa, sizeof(*fa));
1.369     christos 2250:                return error;   /* 0 if not an error, and len == 0 */
1.348     martin   2251:        }
1.342     christos 2252:
1.373     martin   2253:        if (fa->len > lim) {
                   2254:                kmem_free(fa, sizeof(*fa));
                   2255:                return EINVAL;
                   2256:        }
                   2257:
1.348     martin   2258:        fa->size = fa->len;
1.352     rmind    2259:        size_t fal = fa->len * sizeof(*fae);
                   2260:        fae = fa->fae;
                   2261:        fa->fae = kmem_alloc(fal, KM_SLEEP);
                   2262:        error = copyin(fae, fa->fae, fal);
1.344     christos 2263:        if (error)
1.342     christos 2264:                goto out;
                   2265:
                   2266:        pbuf = PNBUF_GET();
1.344     christos 2267:        for (; i < fa->len; i++) {
1.342     christos 2268:                fae = &fa->fae[i];
                   2269:                if (fae->fae_action != FAE_OPEN)
                   2270:                        continue;
1.352     rmind    2271:                error = copyinstr(fae->fae_path, pbuf, MAXPATHLEN, &fal);
1.344     christos 2272:                if (error)
1.342     christos 2273:                        goto out;
1.352     rmind    2274:                fae->fae_path = kmem_alloc(fal, KM_SLEEP);
                   2275:                memcpy(fae->fae_path, pbuf, fal);
1.342     christos 2276:        }
                   2277:        PNBUF_PUT(pbuf);
1.348     martin   2278:
1.342     christos 2279:        *fap = fa;
                   2280:        return 0;
                   2281: out:
                   2282:        if (pbuf)
                   2283:                PNBUF_PUT(pbuf);
1.344     christos 2284:        posix_spawn_fa_free(fa, i);
1.342     christos 2285:        return error;
                   2286: }
                   2287:
1.337     martin   2288: int
1.348     martin   2289: check_posix_spawn(struct lwp *l1)
1.337     martin   2290: {
1.348     martin   2291:        int error, tnprocs, count;
1.337     martin   2292:        uid_t uid;
1.348     martin   2293:        struct proc *p1;
1.337     martin   2294:
                   2295:        p1 = l1->l_proc;
                   2296:        uid = kauth_cred_getuid(l1->l_cred);
                   2297:        tnprocs = atomic_inc_uint_nv(&nprocs);
                   2298:
                   2299:        /*
                   2300:         * Although process entries are dynamically created, we still keep
                   2301:         * a global limit on the maximum number we will create.
                   2302:         */
                   2303:        if (__predict_false(tnprocs >= maxproc))
                   2304:                error = -1;
                   2305:        else
                   2306:                error = kauth_authorize_process(l1->l_cred,
                   2307:                    KAUTH_PROCESS_FORK, p1, KAUTH_ARG(tnprocs), NULL, NULL);
                   2308:
                   2309:        if (error) {
                   2310:                atomic_dec_uint(&nprocs);
1.348     martin   2311:                return EAGAIN;
1.337     martin   2312:        }
                   2313:
                   2314:        /*
                   2315:         * Enforce limits.
                   2316:         */
                   2317:        count = chgproccnt(uid, 1);
1.347     elad     2318:        if (kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_RLIMIT,
                   2319:             p1, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
                   2320:             &p1->p_rlimit[RLIMIT_NPROC], KAUTH_ARG(RLIMIT_NPROC)) != 0 &&
                   2321:            __predict_false(count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur)) {
1.348     martin   2322:                (void)chgproccnt(uid, -1);
                   2323:                atomic_dec_uint(&nprocs);
                   2324:                return EAGAIN;
1.337     martin   2325:        }
                   2326:
1.348     martin   2327:        return 0;
                   2328: }
                   2329:
                   2330: int
1.352     rmind    2331: do_posix_spawn(struct lwp *l1, pid_t *pid_res, bool *child_ok, const char *path,
                   2332:        struct posix_spawn_file_actions *fa,
                   2333:        struct posix_spawnattr *sa,
                   2334:        char *const *argv, char *const *envp,
                   2335:        execve_fetch_element_t fetch)
1.348     martin   2336: {
1.352     rmind    2337:
1.348     martin   2338:        struct proc *p1, *p2;
                   2339:        struct lwp *l2;
                   2340:        int error;
                   2341:        struct spawn_exec_data *spawn_data;
                   2342:        vaddr_t uaddr;
                   2343:        pid_t pid;
1.352     rmind    2344:        bool have_exec_lock = false;
1.348     martin   2345:
                   2346:        p1 = l1->l_proc;
1.342     christos 2347:
1.348     martin   2348:        /* Allocate and init spawn_data */
                   2349:        spawn_data = kmem_zalloc(sizeof(*spawn_data), KM_SLEEP);
                   2350:        spawn_data->sed_refcnt = 1; /* only parent so far */
                   2351:        cv_init(&spawn_data->sed_cv_child_ready, "pspawn");
                   2352:        mutex_init(&spawn_data->sed_mtx_child, MUTEX_DEFAULT, IPL_NONE);
1.352     rmind    2353:        mutex_enter(&spawn_data->sed_mtx_child);
                   2354:
                   2355:        /*
                   2356:         * Do the first part of the exec now, collect state
                   2357:         * in spawn_data.
                   2358:         */
                   2359:        error = execve_loadvm(l1, path, argv,
                   2360:            envp, fetch, &spawn_data->sed_exec);
                   2361:        if (error == EJUSTRETURN)
                   2362:                error = 0;
                   2363:        else if (error)
                   2364:                goto error_exit;
                   2365:
                   2366:        have_exec_lock = true;
1.337     martin   2367:
                   2368:        /*
                   2369:         * Allocate virtual address space for the U-area now, while it
                   2370:         * is still easy to abort the fork operation if we're out of
                   2371:         * kernel virtual address space.
                   2372:         */
                   2373:        uaddr = uvm_uarea_alloc();
                   2374:        if (__predict_false(uaddr == 0)) {
1.352     rmind    2375:                error = ENOMEM;
                   2376:                goto error_exit;
1.351     rmind    2377:        }
1.352     rmind    2378:
1.337     martin   2379:        /*
1.348     martin   2380:         * Allocate new proc. Borrow proc0 vmspace for it, we will
                   2381:         * replace it with its own before returning to userland
                   2382:         * in the child.
1.337     martin   2383:         * This is a point of no return, we will have to go through
                   2384:         * the child proc to properly clean it up past this point.
                   2385:         */
                   2386:        p2 = proc_alloc();
                   2387:        pid = p2->p_pid;
                   2388:
                   2389:        /*
                   2390:         * Make a proc table entry for the new process.
                   2391:         * Start by zeroing the section of proc that is zero-initialized,
                   2392:         * then copy the section that is copied directly from the parent.
                   2393:         */
                   2394:        memset(&p2->p_startzero, 0,
                   2395:            (unsigned) ((char *)&p2->p_endzero - (char *)&p2->p_startzero));
                   2396:        memcpy(&p2->p_startcopy, &p1->p_startcopy,
                   2397:            (unsigned) ((char *)&p2->p_endcopy - (char *)&p2->p_startcopy));
1.348     martin   2398:        p2->p_vmspace = proc0.p_vmspace;
1.337     martin   2399:
1.366     christos 2400:        TAILQ_INIT(&p2->p_sigpend.sp_info);
1.337     martin   2401:
                   2402:        LIST_INIT(&p2->p_lwps);
                   2403:        LIST_INIT(&p2->p_sigwaiters);
                   2404:
                   2405:        /*
                   2406:         * Duplicate sub-structures as needed.
                   2407:         * Increase reference counts on shared objects.
                   2408:         * Inherit flags we want to keep.  The flags related to SIGCHLD
                   2409:         * handling are important in order to keep a consistent behaviour
                   2410:         * for the child after the fork.  If we are a 32-bit process, the
                   2411:         * child will be too.
                   2412:         */
                   2413:        p2->p_flag =
                   2414:            p1->p_flag & (PK_SUGID | PK_NOCLDWAIT | PK_CLDSIGIGN | PK_32);
                   2415:        p2->p_emul = p1->p_emul;
                   2416:        p2->p_execsw = p1->p_execsw;
                   2417:
                   2418:        mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
                   2419:        mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
                   2420:        rw_init(&p2->p_reflock);
                   2421:        cv_init(&p2->p_waitcv, "wait");
                   2422:        cv_init(&p2->p_lwpcv, "lwpwait");
                   2423:
                   2424:        p2->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
                   2425:
                   2426:        kauth_proc_fork(p1, p2);
                   2427:
                   2428:        p2->p_raslist = NULL;
                   2429:        p2->p_fd = fd_copy();
                   2430:
                   2431:        /* XXX racy */
                   2432:        p2->p_mqueue_cnt = p1->p_mqueue_cnt;
                   2433:
                   2434:        p2->p_cwdi = cwdinit();
                   2435:
                   2436:        /*
                   2437:         * Note: p_limit (rlimit stuff) is copy-on-write, so normally
                   2438:         * we just need increase pl_refcnt.
                   2439:         */
1.348     martin   2440:        if (!p1->p_limit->pl_writeable) {
                   2441:                lim_addref(p1->p_limit);
                   2442:                p2->p_limit = p1->p_limit;
1.337     martin   2443:        } else {
                   2444:                p2->p_limit = lim_copy(p1->p_limit);
                   2445:        }
                   2446:
                   2447:        p2->p_lflag = 0;
                   2448:        p2->p_sflag = 0;
                   2449:        p2->p_slflag = 0;
                   2450:        p2->p_pptr = p1;
                   2451:        p2->p_ppid = p1->p_pid;
                   2452:        LIST_INIT(&p2->p_children);
                   2453:
                   2454:        p2->p_aio = NULL;
                   2455:
                   2456: #ifdef KTRACE
                   2457:        /*
                   2458:         * Copy traceflag and tracefile if enabled.
                   2459:         * If not inherited, these were zeroed above.
                   2460:         */
                   2461:        if (p1->p_traceflag & KTRFAC_INHERIT) {
                   2462:                mutex_enter(&ktrace_lock);
                   2463:                p2->p_traceflag = p1->p_traceflag;
                   2464:                if ((p2->p_tracep = p1->p_tracep) != NULL)
                   2465:                        ktradref(p2);
                   2466:                mutex_exit(&ktrace_lock);
                   2467:        }
                   2468: #endif
                   2469:
                   2470:        /*
                   2471:         * Create signal actions for the child process.
                   2472:         */
                   2473:        p2->p_sigacts = sigactsinit(p1, 0);
                   2474:        mutex_enter(p1->p_lock);
                   2475:        p2->p_sflag |=
                   2476:            (p1->p_sflag & (PS_STOPFORK | PS_STOPEXEC | PS_NOCLDSTOP));
                   2477:        sched_proc_fork(p1, p2);
                   2478:        mutex_exit(p1->p_lock);
                   2479:
                   2480:        p2->p_stflag = p1->p_stflag;
                   2481:
                   2482:        /*
                   2483:         * p_stats.
                   2484:         * Copy parts of p_stats, and zero out the rest.
                   2485:         */
                   2486:        p2->p_stats = pstatscopy(p1->p_stats);
                   2487:
                   2488:        /* copy over machdep flags to the new proc */
                   2489:        cpu_proc_fork(p1, p2);
                   2490:
                   2491:        /*
1.352     rmind    2492:         * Prepare remaining parts of spawn data
1.337     martin   2493:         */
1.348     martin   2494:        spawn_data->sed_actions = fa;
                   2495:        spawn_data->sed_attrs = sa;
1.352     rmind    2496:
1.337     martin   2497:        spawn_data->sed_parent = p1;
                   2498:
1.352     rmind    2499:        /* create LWP */
1.337     martin   2500:        lwp_create(l1, p2, uaddr, 0, NULL, 0, spawn_return, spawn_data,
                   2501:            &l2, l1->l_class);
                   2502:        l2->l_ctxlink = NULL;   /* reset ucontext link */
                   2503:
                   2504:        /*
                   2505:         * Copy the credential so other references don't see our changes.
                   2506:         * Test to see if this is necessary first, since in the common case
                   2507:         * we won't need a private reference.
                   2508:         */
                   2509:        if (kauth_cred_geteuid(l2->l_cred) != kauth_cred_getsvuid(l2->l_cred) ||
                   2510:            kauth_cred_getegid(l2->l_cred) != kauth_cred_getsvgid(l2->l_cred)) {
                   2511:                l2->l_cred = kauth_cred_copy(l2->l_cred);
                   2512:                kauth_cred_setsvuid(l2->l_cred, kauth_cred_geteuid(l2->l_cred));
                   2513:                kauth_cred_setsvgid(l2->l_cred, kauth_cred_getegid(l2->l_cred));
                   2514:        }
                   2515:
                   2516:        /* Update the master credentials. */
                   2517:        if (l2->l_cred != p2->p_cred) {
                   2518:                kauth_cred_t ocred;
                   2519:
                   2520:                kauth_cred_hold(l2->l_cred);
                   2521:                mutex_enter(p2->p_lock);
                   2522:                ocred = p2->p_cred;
                   2523:                p2->p_cred = l2->l_cred;
                   2524:                mutex_exit(p2->p_lock);
                   2525:                kauth_cred_free(ocred);
                   2526:        }
                   2527:
1.352     rmind    2528:        *child_ok = true;
                   2529:        spawn_data->sed_refcnt = 2;     /* child gets it as well */
1.348     martin   2530: #if 0
1.345     martin   2531:        l2->l_nopreempt = 1; /* start it non-preemptable */
1.348     martin   2532: #endif
1.345     martin   2533:
1.337     martin   2534:        /*
                   2535:         * It's now safe for the scheduler and other processes to see the
                   2536:         * child process.
                   2537:         */
                   2538:        mutex_enter(proc_lock);
                   2539:
                   2540:        if (p1->p_session->s_ttyvp != NULL && p1->p_lflag & PL_CONTROLT)
                   2541:                p2->p_lflag |= PL_CONTROLT;
                   2542:
                   2543:        LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
                   2544:        p2->p_exitsig = SIGCHLD;        /* signal for parent on exit */
                   2545:
                   2546:        LIST_INSERT_AFTER(p1, p2, p_pglist);
                   2547:        LIST_INSERT_HEAD(&allproc, p2, p_list);
                   2548:
                   2549:        p2->p_trace_enabled = trace_is_enabled(p2);
                   2550: #ifdef __HAVE_SYSCALL_INTERN
                   2551:        (*p2->p_emul->e_syscall_intern)(p2);
                   2552: #endif
                   2553:
                   2554:        /*
                   2555:         * Make child runnable, set start time, and add to run queue except
                   2556:         * if the parent requested the child to start in SSTOP state.
                   2557:         */
                   2558:        mutex_enter(p2->p_lock);
                   2559:
                   2560:        getmicrotime(&p2->p_stats->p_start);
                   2561:
                   2562:        lwp_lock(l2);
                   2563:        KASSERT(p2->p_nrlwps == 1);
                   2564:        p2->p_nrlwps = 1;
                   2565:        p2->p_stat = SACTIVE;
                   2566:        l2->l_stat = LSRUN;
                   2567:        sched_enqueue(l2, false);
                   2568:        lwp_unlock(l2);
                   2569:
                   2570:        mutex_exit(p2->p_lock);
                   2571:        mutex_exit(proc_lock);
                   2572:
                   2573:        cv_wait(&spawn_data->sed_cv_child_ready, &spawn_data->sed_mtx_child);
1.348     martin   2574:        error = spawn_data->sed_error;
1.337     martin   2575:        mutex_exit(&spawn_data->sed_mtx_child);
1.352     rmind    2576:        spawn_exec_data_release(spawn_data);
1.337     martin   2577:
1.341     martin   2578:        rw_exit(&p1->p_reflock);
1.337     martin   2579:        rw_exit(&exec_lock);
1.352     rmind    2580:        have_exec_lock = false;
1.351     rmind    2581:
1.352     rmind    2582:        *pid_res = pid;
                   2583:        return error;
                   2584:
                   2585:  error_exit:
1.376     maxv     2586:        if (have_exec_lock) {
1.352     rmind    2587:                execve_free_data(&spawn_data->sed_exec);
                   2588:                rw_exit(&p1->p_reflock);
1.376     maxv     2589:                rw_exit(&exec_lock);
1.352     rmind    2590:        }
                   2591:        mutex_exit(&spawn_data->sed_mtx_child);
1.351     rmind    2592:        spawn_exec_data_release(spawn_data);
1.376     maxv     2593:
1.348     martin   2594:        return error;
                   2595: }
1.337     martin   2596:
1.348     martin   2597: int
                   2598: sys_posix_spawn(struct lwp *l1, const struct sys_posix_spawn_args *uap,
                   2599:     register_t *retval)
                   2600: {
                   2601:        /* {
                   2602:                syscallarg(pid_t *) pid;
                   2603:                syscallarg(const char *) path;
                   2604:                syscallarg(const struct posix_spawn_file_actions *) file_actions;
                   2605:                syscallarg(const struct posix_spawnattr *) attrp;
                   2606:                syscallarg(char *const *) argv;
                   2607:                syscallarg(char *const *) envp;
                   2608:        } */
                   2609:
                   2610:        int error;
                   2611:        struct posix_spawn_file_actions *fa = NULL;
                   2612:        struct posix_spawnattr *sa = NULL;
                   2613:        pid_t pid;
1.352     rmind    2614:        bool child_ok = false;
1.373     martin   2615:        rlim_t max_fileactions;
                   2616:        proc_t *p = l1->l_proc;
1.348     martin   2617:
                   2618:        error = check_posix_spawn(l1);
                   2619:        if (error) {
                   2620:                *retval = error;
                   2621:                return 0;
                   2622:        }
                   2623:
                   2624:        /* copy in file_actions struct */
                   2625:        if (SCARG(uap, file_actions) != NULL) {
1.373     martin   2626:                max_fileactions = 2 * min(p->p_rlimit[RLIMIT_NOFILE].rlim_cur,
                   2627:                    maxfiles);
                   2628:                error = posix_spawn_fa_alloc(&fa, SCARG(uap, file_actions),
                   2629:                    max_fileactions);
1.348     martin   2630:                if (error)
1.352     rmind    2631:                        goto error_exit;
1.348     martin   2632:        }
                   2633:
                   2634:        /* copyin posix_spawnattr struct */
                   2635:        if (SCARG(uap, attrp) != NULL) {
                   2636:                sa = kmem_alloc(sizeof(*sa), KM_SLEEP);
                   2637:                error = copyin(SCARG(uap, attrp), sa, sizeof(*sa));
                   2638:                if (error)
1.352     rmind    2639:                        goto error_exit;
1.348     martin   2640:        }
1.337     martin   2641:
1.348     martin   2642:        /*
                   2643:         * Do the spawn
                   2644:         */
1.352     rmind    2645:        error = do_posix_spawn(l1, &pid, &child_ok, SCARG(uap, path), fa, sa,
1.348     martin   2646:            SCARG(uap, argv), SCARG(uap, envp), execve_fetch_element);
                   2647:        if (error)
1.352     rmind    2648:                goto error_exit;
1.337     martin   2649:
                   2650:        if (error == 0 && SCARG(uap, pid) != NULL)
                   2651:                error = copyout(&pid, SCARG(uap, pid), sizeof(pid));
                   2652:
                   2653:        *retval = error;
                   2654:        return 0;
                   2655:
1.352     rmind    2656:  error_exit:
                   2657:        if (!child_ok) {
                   2658:                (void)chgproccnt(kauth_cred_getuid(l1->l_cred), -1);
                   2659:                atomic_dec_uint(&nprocs);
                   2660:
                   2661:                if (sa)
                   2662:                        kmem_free(sa, sizeof(*sa));
                   2663:                if (fa)
                   2664:                        posix_spawn_fa_free(fa, fa->len);
                   2665:        }
                   2666:
1.337     martin   2667:        *retval = error;
                   2668:        return 0;
                   2669: }
                   2670:
1.336     matt     2671: void
                   2672: exec_free_emul_arg(struct exec_package *epp)
                   2673: {
                   2674:        if (epp->ep_emul_arg_free != NULL) {
                   2675:                KASSERT(epp->ep_emul_arg != NULL);
                   2676:                (*epp->ep_emul_arg_free)(epp->ep_emul_arg);
                   2677:                epp->ep_emul_arg_free = NULL;
                   2678:                epp->ep_emul_arg = NULL;
                   2679:        } else {
                   2680:                KASSERT(epp->ep_emul_arg == NULL);
                   2681:        }
                   2682: }
1.388     uebayasi 2683:
                   2684: #ifdef DEBUG_EXEC
                   2685: static void
                   2686: dump_vmcmds(const struct exec_package * const epp, size_t x, int error)
                   2687: {
                   2688:        struct exec_vmcmd *vp = &epp->ep_vmcmds.evs_cmds[0];
                   2689:        size_t j;
                   2690:
                   2691:        if (error == 0)
                   2692:                DPRINTF(("vmcmds %u\n", epp->ep_vmcmds.evs_used));
                   2693:        else
                   2694:                DPRINTF(("vmcmds %zu/%u, error %d\n", x,
                   2695:                    epp->ep_vmcmds.evs_used, error));
                   2696:
                   2697:        for (j = 0; j < epp->ep_vmcmds.evs_used; j++) {
                   2698:                DPRINTF(("vmcmd[%zu] = vmcmd_map_%s %#"
                   2699:                    PRIxVADDR"/%#"PRIxVSIZE" fd@%#"
                   2700:                    PRIxVSIZE" prot=0%o flags=%d\n", j,
                   2701:                    vp[j].ev_proc == vmcmd_map_pagedvn ?
                   2702:                    "pagedvn" :
                   2703:                    vp[j].ev_proc == vmcmd_map_readvn ?
                   2704:                    "readvn" :
                   2705:                    vp[j].ev_proc == vmcmd_map_zero ?
                   2706:                    "zero" : "*unknown*",
                   2707:                    vp[j].ev_addr, vp[j].ev_len,
                   2708:                    vp[j].ev_offset, vp[j].ev_prot,
                   2709:                    vp[j].ev_flags));
                   2710:                if (error != 0 && j == x)
                   2711:                        DPRINTF(("     ^--- failed\n"));
                   2712:        }
                   2713: }
                   2714: #endif

CVSweb <webmaster@jp.NetBSD.org>