1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /*
  26  * zlogin provides three types of login which allow users in the global
  27  * zone to access non-global zones.
  28  *
  29  * - "interactive login" is similar to rlogin(1); for example, the user could
  30  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
  31  *   granted a new pty (which is then shoved into the zone), and an I/O
  32  *   loop between parent and child processes takes care of the interactive
  33  *   session.  In this mode, login(1) (and its -c option, which means
  34  *   "already authenticated") is employed to take care of the initialization
  35  *   of the user's session.
  36  *
  37  * - "non-interactive login" is similar to su(1M); the user could issue
  38  *   'zlogin my-zone ls -l' and the command would be run as specified.
  39  *   In this mode, zlogin sets up pipes as the communication channel, and
  40  *   'su' is used to do the login setup work.
  41  *
  42  * - "console login" is the equivalent to accessing the tip line for a
  43  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
  44  *   In this mode, zlogin contacts the zoneadmd process via unix domain
  45  *   socket.  If zoneadmd is not running, it starts it.  This allows the
  46  *   console to be available anytime the zone is installed, regardless of
  47  *   whether it is running.
  48  */
  49 
  50 #include <sys/socket.h>
  51 #include <sys/termios.h>
  52 #include <sys/utsname.h>
  53 #include <sys/stat.h>
  54 #include <sys/types.h>
  55 #include <sys/contract/process.h>
  56 #include <sys/ctfs.h>
  57 #include <sys/brand.h>
  58 #include <sys/wait.h>
  59 #include <alloca.h>
  60 #include <assert.h>
  61 #include <ctype.h>
  62 #include <door.h>
  63 #include <errno.h>
  64 #include <nss_dbdefs.h>
  65 #include <poll.h>
  66 #include <priv.h>
  67 #include <pwd.h>
  68 #include <unistd.h>
  69 #include <utmpx.h>
  70 #include <sac.h>
  71 #include <signal.h>
  72 #include <stdarg.h>
  73 #include <stdio.h>
  74 #include <stdlib.h>
  75 #include <string.h>
  76 #include <strings.h>
  77 #include <stropts.h>
  78 #include <wait.h>
  79 #include <zone.h>
  80 #include <fcntl.h>
  81 #include <libdevinfo.h>
  82 #include <libintl.h>
  83 #include <locale.h>
  84 #include <libzonecfg.h>
  85 #include <libcontract.h>
  86 #include <libbrand.h>
  87 #include <auth_list.h>
  88 #include <auth_attr.h>
  89 #include <secdb.h>
  90 
  91 static int masterfd;
  92 static struct termios save_termios;
  93 static struct termios effective_termios;
  94 static int save_fd;
  95 static struct winsize winsize;
  96 static volatile int dead;
  97 static volatile pid_t child_pid = -1;
  98 static int interactive = 0;
  99 static priv_set_t *dropprivs;
 100 
 101 static int nocmdchar = 0;
 102 static int failsafe = 0;
 103 static char cmdchar = '~';
 104 
 105 static int pollerr = 0;
 106 
 107 static const char *pname;
 108 static char *username;
 109 
 110 /*
 111  * When forced_login is true, the user is not prompted
 112  * for an authentication password in the target zone.
 113  */
 114 static boolean_t forced_login = B_FALSE;
 115 
 116 #if !defined(TEXT_DOMAIN)               /* should be defined by cc -D */
 117 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it wasn't */
 118 #endif
 119 
 120 #define SUPATH  "/usr/bin/su"
 121 #define FAILSAFESHELL   "/sbin/sh"
 122 #define DEFAULTSHELL    "/sbin/sh"
 123 #define DEF_PATH        "/usr/sbin:/usr/bin"
 124 
 125 #define CLUSTER_BRAND_NAME      "cluster"
 126 
 127 /*
 128  * The ZLOGIN_BUFSIZ is larger than PIPE_BUF so we can be sure we're clearing
 129  * out the pipe when the child is exiting.  The ZLOGIN_RDBUFSIZ must be less
 130  * than ZLOGIN_BUFSIZ (because we share the buffer in doio).  This value is
 131  * also chosen in conjunction with the HI_WATER setting to make sure we
 132  * don't fill up the pipe.  We can write FIFOHIWAT (16k) into the pipe before
 133  * blocking.  By having ZLOGIN_RDBUFSIZ set to 1k and HI_WATER set to 8k, we
 134  * know we can always write a ZLOGIN_RDBUFSIZ chunk into the pipe when there
 135  * is less than HI_WATER data already in the pipe.
 136  */
 137 #define ZLOGIN_BUFSIZ   8192
 138 #define ZLOGIN_RDBUFSIZ 1024
 139 #define HI_WATER        8192
 140 
 141 /*
 142  * See canonify() below.  CANONIFY_LEN is the maximum length that a
 143  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
 144  */
 145 #define CANONIFY_LEN 5
 146 
 147 static void
 148 usage(void)
 149 {
 150         (void) fprintf(stderr, gettext("usage: %s [ -CES ] [ -e cmdchar ] "
 151             "[-l user] zonename [command [args ...] ]\n"), pname);
 152         exit(2);
 153 }
 154 
 155 static const char *
 156 getpname(const char *arg0)
 157 {
 158         const char *p = strrchr(arg0, '/');
 159 
 160         if (p == NULL)
 161                 p = arg0;
 162         else
 163                 p++;
 164 
 165         pname = p;
 166         return (p);
 167 }
 168 
 169 static void
 170 zerror(const char *fmt, ...)
 171 {
 172         va_list alist;
 173 
 174         (void) fprintf(stderr, "%s: ", pname);
 175         va_start(alist, fmt);
 176         (void) vfprintf(stderr, fmt, alist);
 177         va_end(alist);
 178         (void) fprintf(stderr, "\n");
 179 }
 180 
 181 static void
 182 zperror(const char *str)
 183 {
 184         const char *estr;
 185 
 186         if ((estr = strerror(errno)) != NULL)
 187                 (void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
 188         else
 189                 (void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
 190 }
 191 
 192 /*
 193  * The first part of our privilege dropping scheme needs to be called before
 194  * fork(), since we must have it for security; we don't want to be surprised
 195  * later that we couldn't allocate the privset.
 196  */
 197 static int
 198 prefork_dropprivs()
 199 {
 200         if ((dropprivs = priv_allocset()) == NULL)
 201                 return (1);
 202 
 203         priv_basicset(dropprivs);
 204         (void) priv_delset(dropprivs, PRIV_PROC_INFO);
 205         (void) priv_delset(dropprivs, PRIV_PROC_FORK);
 206         (void) priv_delset(dropprivs, PRIV_PROC_EXEC);
 207         (void) priv_delset(dropprivs, PRIV_FILE_LINK_ANY);
 208 
 209         /*
 210          * We need to keep the basic privilege PROC_SESSION and all unknown
 211          * basic privileges as well as the privileges PROC_ZONE and
 212          * PROC_OWNER in order to query session information and
 213          * send signals.
 214          */
 215         if (interactive == 0) {
 216                 (void) priv_addset(dropprivs, PRIV_PROC_ZONE);
 217                 (void) priv_addset(dropprivs, PRIV_PROC_OWNER);
 218         } else {
 219                 (void) priv_delset(dropprivs, PRIV_PROC_SESSION);
 220         }
 221 
 222         return (0);
 223 }
 224 
 225 /*
 226  * The second part of the privilege drop.  We are paranoid about being attacked
 227  * by the zone, so we drop all privileges.  This should prevent a compromise
 228  * which gets us to fork(), exec(), symlink(), etc.
 229  */
 230 static void
 231 postfork_dropprivs()
 232 {
 233         if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
 234                 zperror(gettext("Warning: could not set permitted privileges"));
 235         }
 236         if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
 237                 zperror(gettext("Warning: could not set limit privileges"));
 238         }
 239         if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
 240                 zperror(gettext("Warning: could not set inheritable "
 241                     "privileges"));
 242         }
 243 }
 244 
 245 /*
 246  * Create the unix domain socket and call the zoneadmd server; handshake
 247  * with it to determine whether it will allow us to connect.
 248  */
 249 static int
 250 get_console_master(const char *zname)
 251 {
 252         int sockfd = -1;
 253         struct sockaddr_un servaddr;
 254         char clientid[MAXPATHLEN];
 255         char handshake[MAXPATHLEN], c;
 256         int msglen;
 257         int i = 0, err = 0;
 258 
 259         if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
 260                 zperror(gettext("could not create socket"));
 261                 return (-1);
 262         }
 263 
 264         bzero(&servaddr, sizeof (servaddr));
 265         servaddr.sun_family = AF_UNIX;
 266         (void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
 267             "%s/%s.console_sock", ZONES_TMPDIR, zname);
 268 
 269         if (connect(sockfd, (struct sockaddr *)&servaddr,
 270             sizeof (servaddr)) == -1) {
 271                 zperror(gettext("Could not connect to zone console"));
 272                 goto bad;
 273         }
 274         masterfd = sockfd;
 275 
 276         msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s\n",
 277             getpid(), setlocale(LC_MESSAGES, NULL));
 278 
 279         if (msglen >= sizeof (clientid) || msglen < 0) {
 280                 zerror("protocol error");
 281                 goto bad;
 282         }
 283 
 284         if (write(masterfd, clientid, msglen) != msglen) {
 285                 zerror("protocol error");
 286                 goto bad;
 287         }
 288 
 289         bzero(handshake, sizeof (handshake));
 290 
 291         /*
 292          * Take care not to accumulate more than our fill, and leave room for
 293          * the NUL at the end.
 294          */
 295         while ((err = read(masterfd, &c, 1)) == 1) {
 296                 if (i >= (sizeof (handshake) - 1))
 297                         break;
 298                 if (c == '\n')
 299                         break;
 300                 handshake[i] = c;
 301                 i++;
 302         }
 303 
 304         /*
 305          * If something went wrong during the handshake we bail; perhaps
 306          * the server died off.
 307          */
 308         if (err == -1) {
 309                 zperror(gettext("Could not connect to zone console"));
 310                 goto bad;
 311         }
 312 
 313         if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
 314                 return (0);
 315 
 316         zerror(gettext("Console is already in use by process ID %s."),
 317             handshake);
 318 bad:
 319         (void) close(sockfd);
 320         masterfd = -1;
 321         return (-1);
 322 }
 323 
 324 
 325 /*
 326  * Routines to handle pty creation upon zone entry and to shuttle I/O back
 327  * and forth between the two terminals.  We also compute and store the
 328  * name of the slave terminal associated with the master side.
 329  */
 330 static int
 331 get_master_pty()
 332 {
 333         if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
 334                 zperror(gettext("failed to obtain a pseudo-tty"));
 335                 return (-1);
 336         }
 337         if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
 338                 zperror(gettext("failed to get terminal settings from stdin"));
 339                 return (-1);
 340         }
 341         (void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
 342 
 343         return (0);
 344 }
 345 
 346 /*
 347  * This is a bit tricky; normally a pts device will belong to the zone it
 348  * is granted to.  But in the case of "entering" a zone, we need to establish
 349  * the pty before entering the zone so that we can vector I/O to and from it
 350  * from the global zone.
 351  *
 352  * We use the zonept() call to let the ptm driver know what we are up to;
 353  * the only other hairy bit is the setting of zoneslavename (which happens
 354  * above, in get_master_pty()).
 355  */
 356 static int
 357 init_slave_pty(zoneid_t zoneid, char *devroot)
 358 {
 359         int slavefd = -1;
 360         char *slavename, zoneslavename[MAXPATHLEN];
 361 
 362         /*
 363          * Set slave permissions, zone the pts, then unlock it.
 364          */
 365         if (grantpt(masterfd) != 0) {
 366                 zperror(gettext("grantpt failed"));
 367                 return (-1);
 368         }
 369 
 370         if (unlockpt(masterfd) != 0) {
 371                 zperror(gettext("unlockpt failed"));
 372                 return (-1);
 373         }
 374 
 375         /*
 376          * We must open the slave side before zoning this pty; otherwise
 377          * the kernel would refuse us the open-- zoning a pty makes it
 378          * inaccessible to the global zone.  Note we are trying to open
 379          * the device node via the $ZONEROOT/dev path for this pty.
 380          *
 381          * Later we'll close the slave out when once we've opened it again
 382          * from within the target zone.  Blarg.
 383          */
 384         if ((slavename = ptsname(masterfd)) == NULL) {
 385                 zperror(gettext("failed to get name for pseudo-tty"));
 386                 return (-1);
 387         }
 388 
 389         (void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
 390             devroot, slavename);
 391 
 392         if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
 393                 zerror(gettext("failed to open %s: %s"), zoneslavename,
 394                     strerror(errno));
 395                 return (-1);
 396         }
 397 
 398         /*
 399          * Push hardware emulation (ptem), line discipline (ldterm),
 400          * and V7/4BSD/Xenix compatibility (ttcompat) modules.
 401          */
 402         if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
 403                 zperror(gettext("failed to push ptem module"));
 404                 if (!failsafe)
 405                         goto bad;
 406         }
 407 
 408         /*
 409          * Anchor the stream to prevent malicious I_POPs; we prefer to do
 410          * this prior to entering the zone so that we can detect any errors
 411          * early, and so that we can set the anchor from the global zone.
 412          */
 413         if (ioctl(slavefd, I_ANCHOR) == -1) {
 414                 zperror(gettext("failed to set stream anchor"));
 415                 if (!failsafe)
 416                         goto bad;
 417         }
 418 
 419         if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
 420                 zperror(gettext("failed to push ldterm module"));
 421                 if (!failsafe)
 422                         goto bad;
 423         }
 424         if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
 425                 zperror(gettext("failed to push ttcompat module"));
 426                 if (!failsafe)
 427                         goto bad;
 428         }
 429 
 430         /*
 431          * Propagate terminal settings from the external term to the new one.
 432          */
 433         if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
 434                 zperror(gettext("failed to set terminal settings"));
 435                 if (!failsafe)
 436                         goto bad;
 437         }
 438         (void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
 439 
 440         if (zonept(masterfd, zoneid) != 0) {
 441                 zperror(gettext("could not set zoneid of pty"));
 442                 goto bad;
 443         }
 444 
 445         return (slavefd);
 446 
 447 bad:
 448         (void) close(slavefd);
 449         return (-1);
 450 }
 451 
 452 /*
 453  * Place terminal into raw mode.
 454  */
 455 static int
 456 set_tty_rawmode(int fd)
 457 {
 458         struct termios term;
 459         if (tcgetattr(fd, &term) < 0) {
 460                 zperror(gettext("failed to get user terminal settings"));
 461                 return (-1);
 462         }
 463 
 464         /* Stash for later, so we can revert back to previous mode */
 465         save_termios = term;
 466         save_fd = fd;
 467 
 468         /* disable 8->7 bit strip, start/stop, enable any char to restart */
 469         term.c_iflag &= ~(ISTRIP|IXON|IXANY);
 470         /* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
 471         term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
 472         /* disable output post-processing */
 473         term.c_oflag &= ~OPOST;
 474         /* disable canonical mode, signal chars, echo & extended functions */
 475         term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
 476 
 477         term.c_cc[VMIN] = 1;    /* byte-at-a-time */
 478         term.c_cc[VTIME] = 0;
 479 
 480         if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
 481                 zperror(gettext("failed to set user terminal to raw mode"));
 482                 return (-1);
 483         }
 484 
 485         /*
 486          * We need to know the value of VEOF so that we can properly process for
 487          * client-side ~<EOF>.  But we have obliterated VEOF in term,
 488          * because VMIN overloads the same array slot in non-canonical mode.
 489          * Stupid @&^%!
 490          *
 491          * So here we construct the "effective" termios from the current
 492          * terminal settings, and the corrected VEOF and VEOL settings.
 493          */
 494         if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
 495                 zperror(gettext("failed to get user terminal settings"));
 496                 return (-1);
 497         }
 498         effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
 499         effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
 500 
 501         return (0);
 502 }
 503 
 504 /*
 505  * Copy terminal window size from our terminal to the pts.
 506  */
 507 /*ARGSUSED*/
 508 static void
 509 sigwinch(int s)
 510 {
 511         struct winsize ws;
 512 
 513         if (ioctl(0, TIOCGWINSZ, &ws) == 0)
 514                 (void) ioctl(masterfd, TIOCSWINSZ, &ws);
 515 }
 516 
 517 static volatile int close_on_sig = -1;
 518 
 519 static void
 520 /*ARGSUSED*/
 521 sigcld(int s)
 522 {
 523         int status;
 524         pid_t pid;
 525 
 526         /*
 527          * Peek at the exit status.  If this isn't the process we cared
 528          * about, then just reap it.
 529          */
 530         if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
 531                 if (pid == child_pid &&
 532                     (WIFEXITED(status) || WIFSIGNALED(status))) {
 533                         dead = 1;
 534                         if (close_on_sig != -1) {
 535                                 (void) write(close_on_sig, "a", 1);
 536                                 (void) close(close_on_sig);
 537                                 close_on_sig = -1;
 538                         }
 539                 } else {
 540                         (void) waitpid(pid, &status, WNOHANG);
 541                 }
 542         }
 543 }
 544 
 545 /*
 546  * Some signals (currently, SIGINT) must be forwarded on to the process
 547  * group of the child process.
 548  */
 549 static void
 550 sig_forward(int s)
 551 {
 552         if (child_pid != -1) {
 553                 pid_t pgid = getpgid(child_pid);
 554                 if (pgid != -1)
 555                         (void) sigsend(P_PGID, pgid, s);
 556         }
 557 }
 558 
 559 /*
 560  * reset terminal settings for global environment
 561  */
 562 static void
 563 reset_tty()
 564 {
 565         (void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
 566 }
 567 
 568 /*
 569  * Convert character to printable representation, for display with locally
 570  * echoed command characters (like when we need to display ~^D)
 571  */
 572 static void
 573 canonify(char c, char *cc)
 574 {
 575         if (isprint(c)) {
 576                 cc[0] = c;
 577                 cc[1] = '\0';
 578         } else if (c >= 0 && c <= 31) {   /* ^@ through ^_ */
 579                 cc[0] = '^';
 580                 cc[1] = c + '@';
 581                 cc[2] = '\0';
 582         } else {
 583                 cc[0] = '\\';
 584                 cc[1] = ((c >> 6) & 7) + '0';
 585                 cc[2] = ((c >> 3) & 7) + '0';
 586                 cc[3] = (c & 7) + '0';
 587                 cc[4] = '\0';
 588         }
 589 }
 590 
 591 /*
 592  * process_user_input watches the input stream for the escape sequence for
 593  * 'quit' (by default, tilde-period).  Because we might be fed just one
 594  * keystroke at a time, state associated with the user input (are we at the
 595  * beginning of the line?  are we locally echoing the next character?) is
 596  * maintained by beginning_of_line and local_echo across calls to the routine.
 597  * If the write to outfd fails, we'll try to read from infd in an attempt
 598  * to prevent deadlock between the two processes.
 599  *
 600  * This routine returns -1 when the 'quit' escape sequence has been issued,
 601  * or an error is encountered, 1 if stdin is EOF, and 0 otherwise.
 602  */
 603 static int
 604 process_user_input(int outfd, int infd)
 605 {
 606         static boolean_t beginning_of_line = B_TRUE;
 607         static boolean_t local_echo = B_FALSE;
 608         char ibuf[ZLOGIN_BUFSIZ];
 609         int nbytes;
 610         char *buf = ibuf;
 611         char c = *buf;
 612 
 613         nbytes = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
 614         if (nbytes == -1 && (errno != EINTR || dead))
 615                 return (-1);
 616 
 617         if (nbytes == -1)       /* The read was interrupted. */
 618                 return (0);
 619 
 620         /* 0 read means EOF, close the pipe to the child */
 621         if (nbytes == 0)
 622                 return (1);
 623 
 624         for (c = *buf; nbytes > 0; c = *buf, --nbytes) {
 625                 buf++;
 626                 if (beginning_of_line && !nocmdchar) {
 627                         beginning_of_line = B_FALSE;
 628                         if (c == cmdchar) {
 629                                 local_echo = B_TRUE;
 630                                 continue;
 631                         }
 632                 } else if (local_echo) {
 633                         local_echo = B_FALSE;
 634                         if (c == '.' || c == effective_termios.c_cc[VEOF]) {
 635                                 char cc[CANONIFY_LEN];
 636 
 637                                 canonify(c, cc);
 638                                 (void) write(STDOUT_FILENO, &cmdchar, 1);
 639                                 (void) write(STDOUT_FILENO, cc, strlen(cc));
 640                                 return (-1);
 641                         }
 642                 }
 643 retry:
 644                 if (write(outfd, &c, 1) <= 0) {
 645                         /*
 646                          * Since the fd we are writing to is opened with
 647                          * O_NONBLOCK it is possible to get EAGAIN if the
 648                          * pipe is full.  One way this could happen is if we
 649                          * are writing a lot of data into the pipe in this loop
 650                          * and the application on the other end is echoing that
 651                          * data back out to its stdout.  The output pipe can
 652                          * fill up since we are stuck here in this loop and not
 653                          * draining the other pipe.  We can try to read some of
 654                          * the data to see if we can drain the pipe so that the
 655                          * application can continue to make progress.  The read
 656                          * is non-blocking so we won't hang here.  We also wait
 657                          * a bit before retrying since there could be other
 658                          * reasons why the pipe is full and we don't want to
 659                          * continuously retry.
 660                          */
 661                         if (errno == EAGAIN) {
 662                                 struct timespec rqtp;
 663                                 int ln;
 664                                 char obuf[ZLOGIN_BUFSIZ];
 665 
 666                                 if ((ln = read(infd, obuf, ZLOGIN_BUFSIZ)) > 0)
 667                                         (void) write(STDOUT_FILENO, obuf, ln);
 668 
 669                                 /* sleep for 10 milliseconds */
 670                                 rqtp.tv_sec = 0;
 671                                 rqtp.tv_nsec = 10 * (NANOSEC / MILLISEC);
 672                                 (void) nanosleep(&rqtp, NULL);
 673                                 if (!dead)
 674                                         goto retry;
 675                         }
 676 
 677                         return (-1);
 678                 }
 679                 beginning_of_line = (c == '\r' || c == '\n' ||
 680                     c == effective_termios.c_cc[VKILL] ||
 681                     c == effective_termios.c_cc[VEOL] ||
 682                     c == effective_termios.c_cc[VSUSP] ||
 683                     c == effective_termios.c_cc[VINTR]);
 684         }
 685         return (0);
 686 }
 687 
 688 /*
 689  * This function prevents deadlock between zlogin and the application in the
 690  * zone that it is talking to.  This can happen when we read from zlogin's
 691  * stdin and write the data down the pipe to the application.  If the pipe
 692  * is full, we'll block in the write.  Because zlogin could be blocked in
 693  * the write, it would never read the application's stdout/stderr so the
 694  * application can then block on those writes (when the pipe fills up).  If the
 695  * the application gets blocked this way, it can never get around to reading
 696  * its stdin so that zlogin can unblock from its write.  Once in this state,
 697  * the two processes are deadlocked.
 698  *
 699  * To prevent this, we want to verify that we can write into the pipe before we
 700  * read from our stdin.  If the pipe already is pretty full, we bypass the read
 701  * for now.  We'll circle back here again after the poll() so that we can
 702  * try again.  When this function is called, we already know there is data
 703  * ready to read on STDIN_FILENO.  We return -1 if there is a problem, 1 if
 704  * stdin is EOF, and 0 if everything is ok (even though we might not have
 705  * read/written any data into the pipe on this iteration).
 706  */
 707 static int
 708 process_raw_input(int stdin_fd, int appin_fd)
 709 {
 710         int cc;
 711         struct stat64 sb;
 712         char ibuf[ZLOGIN_RDBUFSIZ];
 713 
 714         /* Check how much data is already in the pipe */
 715         if (fstat64(appin_fd, &sb) == -1) {
 716                 perror("stat failed");
 717                 return (-1);
 718         }
 719 
 720         if (dead)
 721                 return (-1);
 722 
 723         /*
 724          * The pipe already has a lot of data in it,  don't write any more
 725          * right now.
 726          */
 727         if (sb.st_size >= HI_WATER)
 728                 return (0);
 729 
 730         cc = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
 731         if (cc == -1 && (errno != EINTR || dead))
 732                 return (-1);
 733 
 734         if (cc == -1)   /* The read was interrupted. */
 735                 return (0);
 736 
 737         /* 0 read means EOF, close the pipe to the child */
 738         if (cc == 0)
 739                 return (1);
 740 
 741         /*
 742          * stdin_fd is stdin of the target; so, the thing we'll write the user
 743          * data *to*.
 744          */
 745         if (write(stdin_fd, ibuf, cc) == -1)
 746                 return (-1);
 747 
 748         return (0);
 749 }
 750 
 751 /*
 752  * Write the output from the application running in the zone.  We can get
 753  * a signal during the write (usually it would be SIGCHLD when the application
 754  * has exited) so we loop to make sure we have written all of the data we read.
 755  */
 756 static int
 757 process_output(int in_fd, int out_fd)
 758 {
 759         int wrote = 0;
 760         int cc;
 761         char ibuf[ZLOGIN_BUFSIZ];
 762 
 763         cc = read(in_fd, ibuf, ZLOGIN_BUFSIZ);
 764         if (cc == -1 && (errno != EINTR || dead))
 765                 return (-1);
 766         if (cc == 0)    /* EOF */
 767                 return (-1);
 768         if (cc == -1)   /* The read was interrupted. */
 769                 return (0);
 770 
 771         do {
 772                 int len;
 773 
 774                 len = write(out_fd, ibuf + wrote, cc - wrote);
 775                 if (len == -1 && errno != EINTR)
 776                         return (-1);
 777                 if (len != -1)
 778                         wrote += len;
 779         } while (wrote < cc);
 780 
 781         return (0);
 782 }
 783 
 784 /*
 785  * This is the main I/O loop, and is shared across all zlogin modes.
 786  * Parameters:
 787  *      stdin_fd:  The fd representing 'stdin' for the slave side; input to
 788  *                 the zone will be written here.
 789  *
 790  *      appin_fd:  The fd representing the other end of the 'stdin' pipe (when
 791  *                 we're running non-interactive); used in process_raw_input
 792  *                 to ensure we don't fill up the application's stdin pipe.
 793  *
 794  *      stdout_fd: The fd representing 'stdout' for the slave side; output
 795  *                 from the zone will arrive here.
 796  *
 797  *      stderr_fd: The fd representing 'stderr' for the slave side; output
 798  *                 from the zone will arrive here.
 799  *
 800  *      raw_mode:  If TRUE, then no processing (for example, for '~.') will
 801  *                 be performed on the input coming from STDIN.
 802  *
 803  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
 804  * mode supplies a stderr).
 805  *
 806  */
 807 static void
 808 doio(int stdin_fd, int appin_fd, int stdout_fd, int stderr_fd, int sig_fd,
 809     boolean_t raw_mode)
 810 {
 811         struct pollfd pollfds[4];
 812         char ibuf[ZLOGIN_BUFSIZ];
 813         int cc, ret;
 814 
 815         /* read from stdout of zone and write to stdout of global zone */
 816         pollfds[0].fd = stdout_fd;
 817         pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
 818 
 819         /* read from stderr of zone and write to stderr of global zone */
 820         pollfds[1].fd = stderr_fd;
 821         pollfds[1].events = pollfds[0].events;
 822 
 823         /* read from stdin of global zone and write to stdin of zone */
 824         pollfds[2].fd = STDIN_FILENO;
 825         pollfds[2].events = pollfds[0].events;
 826 
 827         /* read from signalling pipe so we know when child dies */
 828         pollfds[3].fd = sig_fd;
 829         pollfds[3].events = pollfds[0].events;
 830 
 831         for (;;) {
 832                 pollfds[0].revents = pollfds[1].revents =
 833                     pollfds[2].revents = pollfds[3].revents = 0;
 834 
 835                 if (dead)
 836                         break;
 837 
 838                 /*
 839                  * There is a race condition here where we can receive the
 840                  * child death signal, set the dead flag, but since we have
 841                  * passed the test above, we would go into poll and hang.
 842                  * To avoid this we use the sig_fd as an additional poll fd.
 843                  * The signal handler writes into the other end of this pipe
 844                  * when the child dies so that the poll will always see that
 845                  * input and proceed.  We just loop around at that point and
 846                  * then notice the dead flag.
 847                  */
 848 
 849                 ret = poll(pollfds,
 850                     sizeof (pollfds) / sizeof (struct pollfd), -1);
 851 
 852                 if (ret == -1 && errno != EINTR) {
 853                         perror("poll failed");
 854                         break;
 855                 }
 856 
 857                 if (errno == EINTR && dead) {
 858                         break;
 859                 }
 860 
 861                 /* event from master side stdout */
 862                 if (pollfds[0].revents) {
 863                         if (pollfds[0].revents &
 864                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 865                                 if (process_output(stdout_fd, STDOUT_FILENO)
 866                                     != 0)
 867                                         break;
 868                         } else {
 869                                 pollerr = pollfds[0].revents;
 870                                 break;
 871                         }
 872                 }
 873 
 874                 /* event from master side stderr */
 875                 if (pollfds[1].revents) {
 876                         if (pollfds[1].revents &
 877                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 878                                 if (process_output(stderr_fd, STDERR_FILENO)
 879                                     != 0)
 880                                         break;
 881                         } else {
 882                                 pollerr = pollfds[1].revents;
 883                                 break;
 884                         }
 885                 }
 886 
 887                 /* event from user STDIN side */
 888                 if (pollfds[2].revents) {
 889                         if (pollfds[2].revents &
 890                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 891                                 /*
 892                                  * stdin fd is stdin of the target; so,
 893                                  * the thing we'll write the user data *to*.
 894                                  *
 895                                  * Also, unlike on the output side, we
 896                                  * close the pipe on a zero-length message.
 897                                  */
 898                                 int res;
 899 
 900                                 if (raw_mode)
 901                                         res = process_raw_input(stdin_fd,
 902                                             appin_fd);
 903                                 else
 904                                         res = process_user_input(stdin_fd,
 905                                             stdout_fd);
 906 
 907                                 if (res < 0)
 908                                         break;
 909                                 if (res > 0) {
 910                                         /* EOF (close) child's stdin_fd */
 911                                         pollfds[2].fd = -1;
 912                                         while ((res = close(stdin_fd)) != 0 &&
 913                                             errno == EINTR)
 914                                                 ;
 915                                         if (res != 0)
 916                                                 break;
 917                                 }
 918 
 919                         } else if (raw_mode && pollfds[2].revents & POLLHUP) {
 920                                 /*
 921                                  * It's OK to get a POLLHUP on STDIN-- it
 922                                  * always happens if you do:
 923                                  *
 924                                  * echo foo | zlogin <zone> <command>
 925                                  *
 926                                  * We reset fd to -1 in this case to clear
 927                                  * the condition and close the pipe (EOF) to
 928                                  * the other side in order to wrap things up.
 929                                  */
 930                                 int res;
 931 
 932                                 pollfds[2].fd = -1;
 933                                 while ((res = close(stdin_fd)) != 0 &&
 934                                     errno == EINTR)
 935                                         ;
 936                                 if (res != 0)
 937                                         break;
 938                         } else {
 939                                 pollerr = pollfds[2].revents;
 940                                 break;
 941                         }
 942                 }
 943         }
 944 
 945         /*
 946          * We are in the midst of dying, but try to poll with a short
 947          * timeout to see if we can catch the last bit of I/O from the
 948          * children.
 949          */
 950 retry:
 951         pollfds[0].revents = pollfds[1].revents = 0;
 952         (void) poll(pollfds, 2, 100);
 953         if (pollfds[0].revents &
 954             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 955                 if ((cc = read(stdout_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
 956                         (void) write(STDOUT_FILENO, ibuf, cc);
 957                         goto retry;
 958                 }
 959         }
 960         if (pollfds[1].revents &
 961             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 962                 if ((cc = read(stderr_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
 963                         (void) write(STDERR_FILENO, ibuf, cc);
 964                         goto retry;
 965                 }
 966         }
 967 }
 968 
 969 /*
 970  * Fetch the user_cmd brand hook for getting a user's passwd(4) entry.
 971  */
 972 static const char *
 973 zone_get_user_cmd(brand_handle_t bh, const char *login, char *user_cmd,
 974     size_t len)
 975 {
 976         bzero(user_cmd, sizeof (user_cmd));
 977         if (brand_get_user_cmd(bh, login, user_cmd, len) != 0)
 978                 return (NULL);
 979 
 980         return (user_cmd);
 981 }
 982 
 983 /* From libc */
 984 extern int str2passwd(const char *, int, void *, char *, int);
 985 
 986 /*
 987  * exec() the user_cmd brand hook, and convert the output string to a
 988  * struct passwd.  This is to be called after zone_enter().
 989  *
 990  */
 991 static struct passwd *
 992 zone_get_user_pw(const char *user_cmd, struct passwd *pwent, char *pwbuf,
 993     int pwbuflen)
 994 {
 995         char pwline[NSS_BUFLEN_PASSWD];
 996         char *cin = NULL;
 997         FILE *fin;
 998         int status;
 999 
1000         assert(getzoneid() != GLOBAL_ZONEID);
1001 
1002         if ((fin = popen(user_cmd, "r")) == NULL)
1003                 return (NULL);
1004 
1005         while (cin == NULL && !feof(fin))
1006                 cin = fgets(pwline, sizeof (pwline), fin);
1007 
1008         if (cin == NULL) {
1009                 (void) pclose(fin);
1010                 return (NULL);
1011         }
1012 
1013         status = pclose(fin);
1014         if (!WIFEXITED(status))
1015                 return (NULL);
1016         if (WEXITSTATUS(status) != 0)
1017                 return (NULL);
1018 
1019         if (str2passwd(pwline, sizeof (pwline), pwent, pwbuf, pwbuflen) == 0)
1020                 return (pwent);
1021         else
1022                 return (NULL);
1023 }
1024 
1025 static char **
1026 zone_login_cmd(brand_handle_t bh, const char *login)
1027 {
1028         static char result_buf[ARG_MAX];
1029         char **new_argv, *ptr, *lasts;
1030         int n, a;
1031 
1032         /* Get the login command for the target zone. */
1033         bzero(result_buf, sizeof (result_buf));
1034 
1035         if (forced_login) {
1036                 if (brand_get_forcedlogin_cmd(bh, login,
1037                     result_buf, sizeof (result_buf)) != 0)
1038                         return (NULL);
1039         } else {
1040                 if (brand_get_login_cmd(bh, login,
1041                     result_buf, sizeof (result_buf)) != 0)
1042                         return (NULL);
1043         }
1044 
1045         /*
1046          * We got back a string that we'd like to execute.  But since
1047          * we're not doing the execution via a shell we'll need to convert
1048          * the exec string to an array of strings.  We'll do that here
1049          * but we're going to be very simplistic about it and break stuff
1050          * up based on spaces.  We're not even going to support any kind
1051          * of quoting or escape characters.  It's truly amazing that
1052          * there is no library function in OpenSolaris to do this for us.
1053          */
1054 
1055         /*
1056          * Be paranoid.  Since we're deliniating based on spaces make
1057          * sure there are no adjacent spaces.
1058          */
1059         if (strstr(result_buf, "  ") != NULL)
1060                 return (NULL);
1061 
1062         /* Remove any trailing whitespace.  */
1063         n = strlen(result_buf);
1064         if (result_buf[n - 1] == ' ')
1065                 result_buf[n - 1] = '\0';
1066 
1067         /* Count how many elements there are in the exec string. */
1068         ptr = result_buf;
1069         for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++)
1070                 ;
1071 
1072         /* Allocate the argv array that we're going to return. */
1073         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1074                 return (NULL);
1075 
1076         /* Tokenize the exec string and return. */
1077         a = 0;
1078         new_argv[a++] = result_buf;
1079         if (n > 2) {
1080                 (void) strtok_r(result_buf, " ", &lasts);
1081                 while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL)
1082                         ;
1083         } else {
1084                 new_argv[a++] = NULL;
1085         }
1086         assert(n == a);
1087         return (new_argv);
1088 }
1089 
1090 /*
1091  * Prepare argv array for exec'd process; if we're passing commands to the
1092  * new process, then use su(1M) to do the invocation.  Otherwise, use
1093  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
1094  * login that we're coming from another zone, and to disregard its CONSOLE
1095  * checks).
1096  */
1097 static char **
1098 prep_args(brand_handle_t bh, const char *login, char **argv)
1099 {
1100         int argc = 0, a = 0, i, n = -1;
1101         char **new_argv;
1102 
1103         if (argv != NULL) {
1104                 size_t subshell_len = 1;
1105                 char *subshell;
1106 
1107                 while (argv[argc] != NULL)
1108                         argc++;
1109 
1110                 for (i = 0; i < argc; i++) {
1111                         subshell_len += strlen(argv[i]) + 1;
1112                 }
1113                 if ((subshell = calloc(1, subshell_len)) == NULL)
1114                         return (NULL);
1115 
1116                 for (i = 0; i < argc; i++) {
1117                         (void) strcat(subshell, argv[i]);
1118                         (void) strcat(subshell, " ");
1119                 }
1120 
1121                 if (failsafe) {
1122                         n = 4;
1123                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1124                                 return (NULL);
1125 
1126                         new_argv[a++] = FAILSAFESHELL;
1127                 } else {
1128                         n = 5;
1129                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1130                                 return (NULL);
1131 
1132                         new_argv[a++] = SUPATH;
1133                         if (strcmp(login, "root") != 0) {
1134                                 new_argv[a++] = "-";
1135                                 n++;
1136                         }
1137                         new_argv[a++] = (char *)login;
1138                 }
1139                 new_argv[a++] = "-c";
1140                 new_argv[a++] = subshell;
1141                 new_argv[a++] = NULL;
1142                 assert(a == n);
1143         } else {
1144                 if (failsafe) {
1145                         n = 2;
1146                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1147                                 return (NULL);
1148                         new_argv[a++] = FAILSAFESHELL;
1149                         new_argv[a++] = NULL;
1150                         assert(n == a);
1151                 } else {
1152                         new_argv = zone_login_cmd(bh, login);
1153                 }
1154         }
1155 
1156         return (new_argv);
1157 }
1158 
1159 /*
1160  * Helper routine for prep_env below.
1161  */
1162 static char *
1163 add_env(char *name, char *value)
1164 {
1165         size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
1166         char *str;
1167 
1168         if ((str = malloc(sz)) == NULL)
1169                 return (NULL);
1170 
1171         (void) snprintf(str, sz, "%s=%s", name, value);
1172         return (str);
1173 }
1174 
1175 /*
1176  * Prepare envp array for exec'd process.
1177  */
1178 static char **
1179 prep_env()
1180 {
1181         int e = 0, size = 1;
1182         char **new_env, *estr;
1183         char *term = getenv("TERM");
1184 
1185         size++; /* for $PATH */
1186         if (term != NULL)
1187                 size++;
1188 
1189         /*
1190          * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
1191          * We also set $SHELL, since neither login nor su will be around to do
1192          * it.
1193          */
1194         if (failsafe)
1195                 size += 2;
1196 
1197         if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1198                 return (NULL);
1199 
1200         if ((estr = add_env("PATH", DEF_PATH)) == NULL)
1201                 return (NULL);
1202         new_env[e++] = estr;
1203 
1204         if (term != NULL) {
1205                 if ((estr = add_env("TERM", term)) == NULL)
1206                         return (NULL);
1207                 new_env[e++] = estr;
1208         }
1209 
1210         if (failsafe) {
1211                 if ((estr = add_env("HOME", "/")) == NULL)
1212                         return (NULL);
1213                 new_env[e++] = estr;
1214 
1215                 if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
1216                         return (NULL);
1217                 new_env[e++] = estr;
1218         }
1219 
1220         new_env[e++] = NULL;
1221 
1222         assert(e == size);
1223 
1224         return (new_env);
1225 }
1226 
1227 /*
1228  * Finish the preparation of the envp array for exec'd non-interactive
1229  * zlogins.  This is called in the child process *after* we zone_enter(), since
1230  * it derives things we can only know within the zone, such as $HOME, $SHELL,
1231  * etc.  We need only do this in the non-interactive, mode, since otherwise
1232  * login(1) will do it.  We don't do this in failsafe mode, since it presents
1233  * additional ways in which the command could fail, and we'd prefer to avoid
1234  * that.
1235  */
1236 static char **
1237 prep_env_noninteractive(const char *user_cmd, char **env)
1238 {
1239         size_t size;
1240         char **new_env;
1241         int e, i;
1242         char *estr;
1243         char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
1244         char pwbuf[NSS_BUFLEN_PASSWD + 1];
1245         struct passwd pwent;
1246         struct passwd *pw = NULL;
1247 
1248         assert(env != NULL);
1249         assert(failsafe == 0);
1250 
1251         /*
1252          * Exec the "user_cmd" brand hook to get a pwent for the
1253          * login user.  If this fails, HOME will be set to "/", SHELL
1254          * will be set to $DEFAULTSHELL, and we will continue to exec
1255          * SUPATH <login> -c <cmd>.
1256          */
1257         pw = zone_get_user_pw(user_cmd, &pwent, pwbuf, sizeof (pwbuf));
1258 
1259         /*
1260          * Get existing envp size.
1261          */
1262         for (size = 0; env[size] != NULL; size++)
1263                 ;
1264 
1265         e = size;
1266 
1267         /*
1268          * Finish filling out the environment; we duplicate the environment
1269          * setup described in login(1), for lack of a better precedent.
1270          */
1271         if (pw != NULL)
1272                 size += 3;      /* LOGNAME, HOME, MAIL */
1273         else
1274                 size += 1;      /* HOME */
1275 
1276         size++; /* always fill in SHELL */
1277         size++; /* terminating NULL */
1278 
1279         if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1280                 goto malloc_fail;
1281 
1282         /*
1283          * Copy existing elements of env into new_env.
1284          */
1285         for (i = 0; env[i] != NULL; i++) {
1286                 if ((new_env[i] = strdup(env[i])) == NULL)
1287                         goto malloc_fail;
1288         }
1289         assert(e == i);
1290 
1291         if (pw != NULL) {
1292                 if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
1293                         goto malloc_fail;
1294                 new_env[e++] = estr;
1295 
1296                 if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
1297                         goto malloc_fail;
1298                 new_env[e++] = estr;
1299 
1300                 if (chdir(pw->pw_dir) != 0)
1301                         zerror(gettext("Could not chdir to home directory "
1302                             "%s: %s"), pw->pw_dir, strerror(errno));
1303 
1304                 (void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
1305                     pw->pw_name);
1306                 if ((estr = add_env("MAIL", varmail)) == NULL)
1307                         goto malloc_fail;
1308                 new_env[e++] = estr;
1309         } else {
1310                 if ((estr = add_env("HOME", "/")) == NULL)
1311                         goto malloc_fail;
1312                 new_env[e++] = estr;
1313         }
1314 
1315         if (pw != NULL && strlen(pw->pw_shell) > 0) {
1316                 if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
1317                         goto malloc_fail;
1318                 new_env[e++] = estr;
1319         } else {
1320                 if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
1321                         goto malloc_fail;
1322                 new_env[e++] = estr;
1323         }
1324 
1325         new_env[e++] = NULL;    /* add terminating NULL */
1326 
1327         assert(e == size);
1328         return (new_env);
1329 
1330 malloc_fail:
1331         zperror(gettext("failed to allocate memory for process environment"));
1332         return (NULL);
1333 }
1334 
1335 static int
1336 close_func(void *slavefd, int fd)
1337 {
1338         if (fd != *(int *)slavefd)
1339                 (void) close(fd);
1340         return (0);
1341 }
1342 
1343 static void
1344 set_cmdchar(char *cmdcharstr)
1345 {
1346         char c;
1347         long lc;
1348 
1349         if ((c = *cmdcharstr) != '\\') {
1350                 cmdchar = c;
1351                 return;
1352         }
1353 
1354         c = cmdcharstr[1];
1355         if (c == '\0' || c == '\\') {
1356                 cmdchar = '\\';
1357                 return;
1358         }
1359 
1360         if (c < '0' || c > '7') {
1361                 zerror(gettext("Unrecognized escape character option %s"),
1362                     cmdcharstr);
1363                 usage();
1364         }
1365 
1366         lc = strtol(cmdcharstr + 1, NULL, 8);
1367         if (lc < 0 || lc > 255) {
1368                 zerror(gettext("Octal escape character '%s' too large"),
1369                     cmdcharstr);
1370                 usage();
1371         }
1372         cmdchar = (char)lc;
1373 }
1374 
1375 static int
1376 setup_utmpx(char *slavename)
1377 {
1378         struct utmpx ut;
1379 
1380         bzero(&ut, sizeof (ut));
1381         (void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
1382         (void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
1383         ut.ut_pid = getpid();
1384         ut.ut_id[0] = 'z';
1385         ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
1386         ut.ut_type = LOGIN_PROCESS;
1387         (void) time(&ut.ut_tv.tv_sec);
1388 
1389         if (makeutx(&ut) == NULL) {
1390                 zerror(gettext("makeutx failed"));
1391                 return (-1);
1392         }
1393         return (0);
1394 }
1395 
1396 static void
1397 release_lock_file(int lockfd)
1398 {
1399         (void) close(lockfd);
1400 }
1401 
1402 static int
1403 grab_lock_file(const char *zone_name, int *lockfd)
1404 {
1405         char pathbuf[PATH_MAX];
1406         struct flock flock;
1407 
1408         if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
1409                 zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
1410                     strerror(errno));
1411                 return (-1);
1412         }
1413         (void) chmod(ZONES_TMPDIR, S_IRWXU);
1414         (void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
1415             ZONES_TMPDIR, zone_name);
1416 
1417         if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
1418                 zerror(gettext("could not open %s: %s"), pathbuf,
1419                     strerror(errno));
1420                 return (-1);
1421         }
1422         /*
1423          * Lock the file to synchronize with other zoneadmds
1424          */
1425         flock.l_type = F_WRLCK;
1426         flock.l_whence = SEEK_SET;
1427         flock.l_start = (off_t)0;
1428         flock.l_len = (off_t)0;
1429         if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
1430                 zerror(gettext("unable to lock %s: %s"), pathbuf,
1431                     strerror(errno));
1432                 release_lock_file(*lockfd);
1433                 return (-1);
1434         }
1435         return (Z_OK);
1436 }
1437 
1438 static int
1439 start_zoneadmd(const char *zone_name)
1440 {
1441         pid_t retval;
1442         int pstatus = 0, error = -1, lockfd, doorfd;
1443         struct door_info info;
1444         char doorpath[MAXPATHLEN];
1445 
1446         (void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
1447 
1448         if (grab_lock_file(zone_name, &lockfd) != Z_OK)
1449                 return (-1);
1450         /*
1451          * We must do the door check with the lock held.  Otherwise, we
1452          * might race against another zoneadm/zlogin process and wind
1453          * up with two processes trying to start zoneadmd at the same
1454          * time.  zoneadmd will detect this, and fail, but we prefer this
1455          * to be as seamless as is practical, from a user perspective.
1456          */
1457         if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
1458                 if (errno != ENOENT) {
1459                         zerror("failed to open %s: %s", doorpath,
1460                             strerror(errno));
1461                         goto out;
1462                 }
1463         } else {
1464                 /*
1465                  * Seems to be working ok.
1466                  */
1467                 if (door_info(doorfd, &info) == 0 &&
1468                     ((info.di_attributes & DOOR_REVOKED) == 0)) {
1469                         error = 0;
1470                         goto out;
1471                 }
1472         }
1473 
1474         if ((child_pid = fork()) == -1) {
1475                 zperror(gettext("could not fork"));
1476                 goto out;
1477         } else if (child_pid == 0) {
1478                 /* child process */
1479                 (void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
1480                     zone_name, NULL);
1481                 zperror(gettext("could not exec zoneadmd"));
1482                 _exit(1);
1483         }
1484 
1485         /* parent process */
1486         do {
1487                 retval = waitpid(child_pid, &pstatus, 0);
1488         } while (retval != child_pid);
1489         if (WIFSIGNALED(pstatus) ||
1490             (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
1491                 zerror(gettext("could not start %s"), "zoneadmd");
1492                 goto out;
1493         }
1494         error = 0;
1495 out:
1496         release_lock_file(lockfd);
1497         (void) close(doorfd);
1498         return (error);
1499 }
1500 
1501 static int
1502 init_template(void)
1503 {
1504         int fd;
1505         int err = 0;
1506 
1507         fd = open64(CTFS_ROOT "/process/template", O_RDWR);
1508         if (fd == -1)
1509                 return (-1);
1510 
1511         /*
1512          * zlogin doesn't do anything with the contract.
1513          * Deliver no events, don't inherit, and allow it to be orphaned.
1514          */
1515         err |= ct_tmpl_set_critical(fd, 0);
1516         err |= ct_tmpl_set_informative(fd, 0);
1517         err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
1518         err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
1519         if (err || ct_tmpl_activate(fd)) {
1520                 (void) close(fd);
1521                 return (-1);
1522         }
1523 
1524         return (fd);
1525 }
1526 
1527 static int
1528 noninteractive_login(char *zonename, const char *user_cmd, zoneid_t zoneid,
1529     char **new_args, char **new_env)
1530 {
1531         pid_t retval;
1532         int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2], dead_child_pipe[2];
1533         int child_status;
1534         int tmpl_fd;
1535         sigset_t block_cld;
1536 
1537         if ((tmpl_fd = init_template()) == -1) {
1538                 reset_tty();
1539                 zperror(gettext("could not create contract"));
1540                 return (1);
1541         }
1542 
1543         if (pipe(stdin_pipe) != 0) {
1544                 zperror(gettext("could not create STDIN pipe"));
1545                 return (1);
1546         }
1547         /*
1548          * When the user types ^D, we get a zero length message on STDIN.
1549          * We need to echo that down the pipe to send it to the other side;
1550          * but by default, pipes don't propagate zero-length messages.  We
1551          * toggle that behavior off using I_SWROPT.  See streamio(7i).
1552          */
1553         if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
1554                 zperror(gettext("could not configure STDIN pipe"));
1555                 return (1);
1556 
1557         }
1558         if (pipe(stdout_pipe) != 0) {
1559                 zperror(gettext("could not create STDOUT pipe"));
1560                 return (1);
1561         }
1562         if (pipe(stderr_pipe) != 0) {
1563                 zperror(gettext("could not create STDERR pipe"));
1564                 return (1);
1565         }
1566 
1567         if (pipe(dead_child_pipe) != 0) {
1568                 zperror(gettext("could not create signalling pipe"));
1569                 return (1);
1570         }
1571         close_on_sig = dead_child_pipe[0];
1572 
1573         /*
1574          * If any of the pipe FD's winds up being less than STDERR, then we
1575          * have a mess on our hands-- and we are lacking some of the I/O
1576          * streams we would expect anyway.  So we bail.
1577          */
1578         if (stdin_pipe[0] <= STDERR_FILENO ||
1579             stdin_pipe[1] <= STDERR_FILENO ||
1580             stdout_pipe[0] <= STDERR_FILENO ||
1581             stdout_pipe[1] <= STDERR_FILENO ||
1582             stderr_pipe[0] <= STDERR_FILENO ||
1583             stderr_pipe[1] <= STDERR_FILENO ||
1584             dead_child_pipe[0] <= STDERR_FILENO ||
1585             dead_child_pipe[1] <= STDERR_FILENO) {
1586                 zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
1587                 return (1);
1588         }
1589 
1590         if (prefork_dropprivs() != 0) {
1591                 zperror(gettext("could not allocate privilege set"));
1592                 return (1);
1593         }
1594 
1595         (void) sigset(SIGCLD, sigcld);
1596         (void) sigemptyset(&block_cld);
1597         (void) sigaddset(&block_cld, SIGCLD);
1598         (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1599 
1600         if ((child_pid = fork()) == -1) {
1601                 (void) ct_tmpl_clear(tmpl_fd);
1602                 (void) close(tmpl_fd);
1603                 zperror(gettext("could not fork"));
1604                 return (1);
1605         } else if (child_pid == 0) { /* child process */
1606                 (void) ct_tmpl_clear(tmpl_fd);
1607 
1608                 /*
1609                  * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
1610                  */
1611                 (void) close(STDIN_FILENO);
1612                 (void) close(STDOUT_FILENO);
1613                 (void) close(STDERR_FILENO);
1614                 (void) dup2(stdin_pipe[1], STDIN_FILENO);
1615                 (void) dup2(stdout_pipe[1], STDOUT_FILENO);
1616                 (void) dup2(stderr_pipe[1], STDERR_FILENO);
1617                 (void) closefrom(STDERR_FILENO + 1);
1618 
1619                 (void) sigset(SIGCLD, SIG_DFL);
1620                 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1621                 /*
1622                  * In case any of stdin, stdout or stderr are streams,
1623                  * anchor them to prevent malicious I_POPs.
1624                  */
1625                 (void) ioctl(STDIN_FILENO, I_ANCHOR);
1626                 (void) ioctl(STDOUT_FILENO, I_ANCHOR);
1627                 (void) ioctl(STDERR_FILENO, I_ANCHOR);
1628 
1629                 if (zone_enter(zoneid) == -1) {
1630                         zerror(gettext("could not enter zone %s: %s"),
1631                             zonename, strerror(errno));
1632                         _exit(1);
1633                 }
1634 
1635                 /*
1636                  * For non-native zones, tell libc where it can find locale
1637                  * specific getttext() messages.
1638                  */
1639                 if (access("/.SUNWnative/usr/lib/locale", R_OK) == 0)
1640                         (void) bindtextdomain(TEXT_DOMAIN,
1641                             "/.SUNWnative/usr/lib/locale");
1642                 else if (access("/native/usr/lib/locale", R_OK) == 0)
1643                         (void) bindtextdomain(TEXT_DOMAIN,
1644                             "/native/usr/lib/locale");
1645 
1646                 if (!failsafe)
1647                         new_env = prep_env_noninteractive(user_cmd, new_env);
1648 
1649                 if (new_env == NULL) {
1650                         _exit(1);
1651                 }
1652 
1653                 /*
1654                  * Move into a new process group; the zone_enter will have
1655                  * placed us into zsched's session, and we want to be in
1656                  * a unique process group.
1657                  */
1658                 (void) setpgid(getpid(), getpid());
1659 
1660                 /*
1661                  * The child needs to run as root to
1662                  * execute the su program.
1663                  */
1664                 if (setuid(0) == -1) {
1665                         zperror(gettext("insufficient privilege"));
1666                         return (1);
1667                 }
1668 
1669                 (void) execve(new_args[0], new_args, new_env);
1670                 zperror(gettext("exec failure"));
1671                 _exit(1);
1672         }
1673         /* parent */
1674 
1675         /* close pipe sides written by child */
1676         (void) close(stdout_pipe[1]);
1677         (void) close(stderr_pipe[1]);
1678 
1679         (void) sigset(SIGINT, sig_forward);
1680 
1681         postfork_dropprivs();
1682 
1683         (void) ct_tmpl_clear(tmpl_fd);
1684         (void) close(tmpl_fd);
1685 
1686         (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1687         doio(stdin_pipe[0], stdin_pipe[1], stdout_pipe[0], stderr_pipe[0],
1688             dead_child_pipe[1], B_TRUE);
1689         do {
1690                 retval = waitpid(child_pid, &child_status, 0);
1691                 if (retval == -1) {
1692                         child_status = 0;
1693                 }
1694         } while (retval != child_pid && errno != ECHILD);
1695 
1696         return (WEXITSTATUS(child_status));
1697 }
1698 
1699 static char *
1700 get_username()
1701 {
1702         uid_t   uid;
1703         struct passwd *nptr;
1704 
1705         /*
1706          * Authorizations are checked to restrict access based on the
1707          * requested operation and zone name, It is assumed that the
1708          * program is running with all privileges, but that the real
1709          * user ID is that of the user or role on whose behalf we are
1710          * operating. So we start by getting the username that will be
1711          * used for subsequent authorization checks.
1712          */
1713 
1714         uid = getuid();
1715         if ((nptr = getpwuid(uid)) == NULL) {
1716                 zerror(gettext("could not get user name."));
1717                 _exit(1);
1718         }
1719         return (nptr->pw_name);
1720 }
1721 
1722 int
1723 main(int argc, char **argv)
1724 {
1725         int arg, console = 0;
1726         zoneid_t zoneid;
1727         zone_state_t st;
1728         char *login = "root";
1729         int lflag = 0;
1730         char *zonename = NULL;
1731         char **proc_args = NULL;
1732         char **new_args, **new_env;
1733         sigset_t block_cld;
1734         char devroot[MAXPATHLEN];
1735         char *slavename, slaveshortname[MAXPATHLEN];
1736         priv_set_t *privset;
1737         int tmpl_fd;
1738         char zonebrand[MAXNAMELEN];
1739         char default_brand[MAXNAMELEN];
1740         struct stat sb;
1741         char kernzone[ZONENAME_MAX];
1742         brand_handle_t bh;
1743         char user_cmd[MAXPATHLEN];
1744         char authname[MAXAUTHS];
1745 
1746         (void) setlocale(LC_ALL, "");
1747         (void) textdomain(TEXT_DOMAIN);
1748 
1749         (void) getpname(argv[0]);
1750         username = get_username();
1751 
1752         while ((arg = getopt(argc, argv, "ECR:Se:l:")) != EOF) {
1753                 switch (arg) {
1754                 case 'C':
1755                         console = 1;
1756                         break;
1757                 case 'E':
1758                         nocmdchar = 1;
1759                         break;
1760                 case 'R':       /* undocumented */
1761                         if (*optarg != '/') {
1762                                 zerror(gettext("root path must be absolute."));
1763                                 exit(2);
1764                         }
1765                         if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
1766                                 zerror(
1767                                     gettext("root path must be a directory."));
1768                                 exit(2);
1769                         }
1770                         zonecfg_set_root(optarg);
1771                         break;
1772                 case 'S':
1773                         failsafe = 1;
1774                         break;
1775                 case 'e':
1776                         set_cmdchar(optarg);
1777                         break;
1778                 case 'l':
1779                         login = optarg;
1780                         lflag = 1;
1781                         break;
1782                 default:
1783                         usage();
1784                 }
1785         }
1786 
1787         if (console != 0 && lflag != 0) {
1788                 zerror(gettext("-l may not be specified for console login"));
1789                 usage();
1790         }
1791 
1792         if (console != 0 && failsafe != 0) {
1793                 zerror(gettext("-S may not be specified for console login"));
1794                 usage();
1795         }
1796 
1797         if (console != 0 && zonecfg_in_alt_root()) {
1798                 zerror(gettext("-R may not be specified for console login"));
1799                 exit(2);
1800         }
1801 
1802         if (failsafe != 0 && lflag != 0) {
1803                 zerror(gettext("-l may not be specified for failsafe login"));
1804                 usage();
1805         }
1806 
1807         if (optind == (argc - 1)) {
1808                 /*
1809                  * zone name, no process name; this should be an interactive
1810                  * as long as STDIN is really a tty.
1811                  */
1812                 if (isatty(STDIN_FILENO))
1813                         interactive = 1;
1814                 zonename = argv[optind];
1815         } else if (optind < (argc - 1)) {
1816                 if (console) {
1817                         zerror(gettext("Commands may not be specified for "
1818                             "console login."));
1819                         usage();
1820                 }
1821                 /* zone name and process name, and possibly some args */
1822                 zonename = argv[optind];
1823                 proc_args = &argv[optind + 1];
1824                 interactive = 0;
1825         } else {
1826                 usage();
1827         }
1828 
1829         if (getzoneid() != GLOBAL_ZONEID) {
1830                 zerror(gettext("'%s' may only be used from the global zone"),
1831                     pname);
1832                 return (1);
1833         }
1834 
1835         if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
1836                 zerror(gettext("'%s' not applicable to the global zone"),
1837                     pname);
1838                 return (1);
1839         }
1840 
1841         if (zone_get_state(zonename, &st) != Z_OK) {
1842                 zerror(gettext("zone '%s' unknown"), zonename);
1843                 return (1);
1844         }
1845 
1846         if (st < ZONE_STATE_INSTALLED) {
1847                 zerror(gettext("cannot login to a zone which is '%s'"),
1848                     zone_state_str(st));
1849                 return (1);
1850         }
1851 
1852         /*
1853          * In both console and non-console cases, we require all privs.
1854          * In the console case, because we may need to startup zoneadmd.
1855          * In the non-console case in order to do zone_enter(2), zonept()
1856          * and other tasks.
1857          */
1858 
1859         if ((privset = priv_allocset()) == NULL) {
1860                 zperror(gettext("priv_allocset failed"));
1861                 return (1);
1862         }
1863 
1864         if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1865                 zperror(gettext("getppriv failed"));
1866                 priv_freeset(privset);
1867                 return (1);
1868         }
1869 
1870         if (priv_isfullset(privset) == B_FALSE) {
1871                 zerror(gettext("You lack sufficient privilege to run "
1872                     "this command (all privs required)"));
1873                 priv_freeset(privset);
1874                 return (1);
1875         }
1876         priv_freeset(privset);
1877 
1878         /*
1879          * Check if user is authorized for requested usage of the zone
1880          */
1881 
1882         (void) snprintf(authname, MAXAUTHS, "%s%s%s",
1883             ZONE_MANAGE_AUTH, KV_OBJECT, zonename);
1884         if (chkauthattr(authname, username) == 0) {
1885                 if (console) {
1886                         zerror(gettext("%s is not authorized for console "
1887                             "access to  %s zone."),
1888                             username, zonename);
1889                         return (1);
1890                 } else {
1891                         (void) snprintf(authname, MAXAUTHS, "%s%s%s",
1892                             ZONE_LOGIN_AUTH, KV_OBJECT, zonename);
1893                         if (failsafe || !interactive) {
1894                                 zerror(gettext("%s is not authorized for  "
1895                                     "failsafe or non-interactive login "
1896                                     "to  %s zone."), username, zonename);
1897                                 return (1);
1898                         } else if (chkauthattr(authname, username) == 0) {
1899                                 zerror(gettext("%s is not authorized "
1900                                     " to login to %s zone."),
1901                                     username, zonename);
1902                                 return (1);
1903                         }
1904                 }
1905         } else {
1906                 forced_login = B_TRUE;
1907         }
1908 
1909         /*
1910          * The console is a separate case from the rest of the code; handle
1911          * it first.
1912          */
1913         if (console) {
1914                 /*
1915                  * Ensure that zoneadmd for this zone is running.
1916                  */
1917                 if (start_zoneadmd(zonename) == -1)
1918                         return (1);
1919 
1920                 /*
1921                  * Make contact with zoneadmd.
1922                  */
1923                 if (get_console_master(zonename) == -1)
1924                         return (1);
1925 
1926                 (void) printf(gettext("[Connected to zone '%s' console]\n"),
1927                     zonename);
1928 
1929                 if (set_tty_rawmode(STDIN_FILENO) == -1) {
1930                         reset_tty();
1931                         zperror(gettext("failed to set stdin pty to raw mode"));
1932                         return (1);
1933                 }
1934 
1935                 (void) sigset(SIGWINCH, sigwinch);
1936                 (void) sigwinch(0);
1937 
1938                 /*
1939                  * Run the I/O loop until we get disconnected.
1940                  */
1941                 doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
1942                 reset_tty();
1943                 (void) printf(gettext("\n[Connection to zone '%s' console "
1944                     "closed]\n"), zonename);
1945 
1946                 return (0);
1947         }
1948 
1949         if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
1950                 zerror(gettext("login allowed only to running zones "
1951                     "(%s is '%s')."), zonename, zone_state_str(st));
1952                 return (1);
1953         }
1954 
1955         (void) strlcpy(kernzone, zonename, sizeof (kernzone));
1956         if (zonecfg_in_alt_root()) {
1957                 FILE *fp = zonecfg_open_scratch("", B_FALSE);
1958 
1959                 if (fp == NULL || zonecfg_find_scratch(fp, zonename,
1960                     zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
1961                         zerror(gettext("cannot find scratch zone %s"),
1962                             zonename);
1963                         if (fp != NULL)
1964                                 zonecfg_close_scratch(fp);
1965                         return (1);
1966                 }
1967                 zonecfg_close_scratch(fp);
1968         }
1969 
1970         if ((zoneid = getzoneidbyname(kernzone)) == -1) {
1971                 zerror(gettext("failed to get zoneid for zone '%s'"),
1972                     zonename);
1973                 return (1);
1974         }
1975 
1976         /*
1977          * We need the zone root path only if we are setting up a pty.
1978          */
1979         if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
1980                 zerror(gettext("could not get dev path for zone %s"),
1981                     zonename);
1982                 return (1);
1983         }
1984 
1985         if (zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) {
1986                 zerror(gettext("could not get brand for zone %s"), zonename);
1987                 return (1);
1988         }
1989         /*
1990          * In the alternate root environment, the only supported
1991          * operations are mount and unmount.  In this case, just treat
1992          * the zone as native if it is cluster.  Cluster zones can be
1993          * native for the purpose of LU or upgrade, and the cluster
1994          * brand may not exist in the miniroot (such as in net install
1995          * upgrade).
1996          */
1997         if (zonecfg_default_brand(default_brand,
1998             sizeof (default_brand)) != Z_OK) {
1999                 zerror(gettext("unable to determine default brand"));
2000                 return (1);
2001         }
2002         if (zonecfg_in_alt_root() &&
2003             strcmp(zonebrand, CLUSTER_BRAND_NAME) == 0) {
2004                 (void) strlcpy(zonebrand, default_brand, sizeof (zonebrand));
2005         }
2006 
2007         if ((bh = brand_open(zonebrand)) == NULL) {
2008                 zerror(gettext("could not open brand for zone %s"), zonename);
2009                 return (1);
2010         }
2011 
2012         if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
2013                 zperror(gettext("could not assemble new arguments"));
2014                 brand_close(bh);
2015                 return (1);
2016         }
2017         /*
2018          * Get the brand specific user_cmd.  This command is used to get
2019          * a passwd(4) entry for login.
2020          */
2021         if (!interactive && !failsafe) {
2022                 if (zone_get_user_cmd(bh, login, user_cmd,
2023                     sizeof (user_cmd)) == NULL) {
2024                         zerror(gettext("could not get user_cmd for zone %s"),
2025                             zonename);
2026                         brand_close(bh);
2027                         return (1);
2028                 }
2029         }
2030         brand_close(bh);
2031 
2032         if ((new_env = prep_env()) == NULL) {
2033                 zperror(gettext("could not assemble new environment"));
2034                 return (1);
2035         }
2036 
2037         if (!interactive)
2038                 return (noninteractive_login(zonename, user_cmd, zoneid,
2039                     new_args, new_env));
2040 
2041         if (zonecfg_in_alt_root()) {
2042                 zerror(gettext("cannot use interactive login with scratch "
2043                     "zone"));
2044                 return (1);
2045         }
2046 
2047         /*
2048          * Things are more complex in interactive mode; we get the
2049          * master side of the pty, then place the user's terminal into
2050          * raw mode.
2051          */
2052         if (get_master_pty() == -1) {
2053                 zerror(gettext("could not setup master pty device"));
2054                 return (1);
2055         }
2056 
2057         /*
2058          * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
2059          */
2060         if ((slavename = ptsname(masterfd)) == NULL) {
2061                 zperror(gettext("failed to get name for pseudo-tty"));
2062                 return (1);
2063         }
2064         if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
2065                 (void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
2066                     sizeof (slaveshortname));
2067         else
2068                 (void) strlcpy(slaveshortname, slavename,
2069                     sizeof (slaveshortname));
2070 
2071         (void) printf(gettext("[Connected to zone '%s' %s]\n"), zonename,
2072             slaveshortname);
2073 
2074         if (set_tty_rawmode(STDIN_FILENO) == -1) {
2075                 reset_tty();
2076                 zperror(gettext("failed to set stdin pty to raw mode"));
2077                 return (1);
2078         }
2079 
2080         if (prefork_dropprivs() != 0) {
2081                 reset_tty();
2082                 zperror(gettext("could not allocate privilege set"));
2083                 return (1);
2084         }
2085 
2086         /*
2087          * We must mask SIGCLD until after we have coped with the fork
2088          * sufficiently to deal with it; otherwise we can race and receive the
2089          * signal before child_pid has been initialized (yes, this really
2090          * happens).
2091          */
2092         (void) sigset(SIGCLD, sigcld);
2093         (void) sigemptyset(&block_cld);
2094         (void) sigaddset(&block_cld, SIGCLD);
2095         (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2096 
2097         /*
2098          * We activate the contract template at the last minute to
2099          * avoid intermediate functions that could be using fork(2)
2100          * internally.
2101          */
2102         if ((tmpl_fd = init_template()) == -1) {
2103                 reset_tty();
2104                 zperror(gettext("could not create contract"));
2105                 return (1);
2106         }
2107 
2108         if ((child_pid = fork()) == -1) {
2109                 (void) ct_tmpl_clear(tmpl_fd);
2110                 reset_tty();
2111                 zperror(gettext("could not fork"));
2112                 return (1);
2113         } else if (child_pid == 0) { /* child process */
2114                 int slavefd, newslave;
2115 
2116                 (void) ct_tmpl_clear(tmpl_fd);
2117                 (void) close(tmpl_fd);
2118 
2119                 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2120 
2121                 if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
2122                         return (1);
2123 
2124                 /*
2125                  * Close all fds except for the slave pty.
2126                  */
2127                 (void) fdwalk(close_func, &slavefd);
2128 
2129                 /*
2130                  * Temporarily dup slavefd to stderr; that way if we have
2131                  * to print out that zone_enter failed, the output will
2132                  * have somewhere to go.
2133                  */
2134                 if (slavefd != STDERR_FILENO)
2135                         (void) dup2(slavefd, STDERR_FILENO);
2136 
2137                 if (zone_enter(zoneid) == -1) {
2138                         zerror(gettext("could not enter zone %s: %s"),
2139                             zonename, strerror(errno));
2140                         return (1);
2141                 }
2142 
2143                 if (slavefd != STDERR_FILENO)
2144                         (void) close(STDERR_FILENO);
2145 
2146                 /*
2147                  * We take pains to get this process into a new process
2148                  * group, and subsequently a new session.  In this way,
2149                  * we'll have a session which doesn't yet have a controlling
2150                  * terminal.  When we open the slave, it will become the
2151                  * controlling terminal; no PIDs concerning pgrps or sids
2152                  * will leak inappropriately into the zone.
2153                  */
2154                 (void) setpgrp();
2155 
2156                 /*
2157                  * We need the slave pty to be referenced from the zone's
2158                  * /dev in order to ensure that the devt's, etc are all
2159                  * correct.  Otherwise we break ttyname and the like.
2160                  */
2161                 if ((newslave = open(slavename, O_RDWR)) == -1) {
2162                         (void) close(slavefd);
2163                         return (1);
2164                 }
2165                 (void) close(slavefd);
2166                 slavefd = newslave;
2167 
2168                 /*
2169                  * dup the slave to the various FDs, so that when the
2170                  * spawned process does a write/read it maps to the slave
2171                  * pty.
2172                  */
2173                 (void) dup2(slavefd, STDIN_FILENO);
2174                 (void) dup2(slavefd, STDOUT_FILENO);
2175                 (void) dup2(slavefd, STDERR_FILENO);
2176                 if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
2177                     slavefd != STDERR_FILENO) {
2178                         (void) close(slavefd);
2179                 }
2180 
2181                 /*
2182                  * In failsafe mode, we don't use login(1), so don't try
2183                  * setting up a utmpx entry.
2184                  *
2185                  * A branded zone may have very different utmpx semantics.
2186                  * At the moment, we only have two brand types:
2187                  * Solaris-like (native, sn1) and Linux.  In the Solaris
2188                  * case, we know exactly how to do the necessary utmpx
2189                  * setup.  Fortunately for us, the Linux /bin/login is
2190                  * prepared to deal with a non-initialized utmpx entry, so
2191                  * we can simply skip it.  If future brands don't fall into
2192                  * either category, we'll have to add a per-brand utmpx
2193                  * setup hook.
2194                  */
2195                 if (!failsafe && (strcmp(zonebrand, "lx") != 0))
2196                         if (setup_utmpx(slaveshortname) == -1)
2197                                 return (1);
2198 
2199                 /*
2200                  * The child needs to run as root to
2201                  * execute the brand's login program.
2202                  */
2203                 if (setuid(0) == -1) {
2204                         zperror(gettext("insufficient privilege"));
2205                         return (1);
2206                 }
2207 
2208                 (void) execve(new_args[0], new_args, new_env);
2209                 zperror(gettext("exec failure"));
2210                 return (1);
2211         }
2212 
2213         (void) ct_tmpl_clear(tmpl_fd);
2214         (void) close(tmpl_fd);
2215 
2216         /*
2217          * The rest is only for the parent process.
2218          */
2219         (void) sigset(SIGWINCH, sigwinch);
2220 
2221         postfork_dropprivs();
2222 
2223         (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2224         doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
2225 
2226         reset_tty();
2227         (void) fprintf(stderr,
2228             gettext("\n[Connection to zone '%s' %s closed]\n"), zonename,
2229             slaveshortname);
2230 
2231         if (pollerr != 0) {
2232                 (void) fprintf(stderr, gettext("Error: connection closed due "
2233                     "to unexpected pollevents=0x%x.\n"), pollerr);
2234                 return (1);
2235         }
2236 
2237         return (0);
2238 }