1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 
  27 /*
  28  * This driver attempts to emulate some of the the behaviors of
  29  * Linux terminal devices (/dev/ptmx and /dev/pts/[0-9][0-9]*) on Solaris
  30  *
  31  * It does this by layering over the /dev/ptmx device and intercepting
  32  * opens to it.
  33  *
  34  * This driver makes the following assumptions about the way the ptm/pts
  35  * drivers on Solaris work:
  36  *
  37  *    - all opens of the /dev/ptmx device node return a unique dev_t.
  38  *
  39  *    - the dev_t minor node value for each open ptm instance corrospondes
  40  *      to it's associated slave terminal device number.  ie. the path to
  41  *      the slave terminal device associated with an open ptm instance
  42  *      who's dev_t minor node vaue is 5, is /dev/pts/5.
  43  *
  44  *    - the ptm driver always allocates the lowest numbered slave terminal
  45  *      device possible.
  46  */
  47 
  48 #include <sys/conf.h>
  49 #include <sys/ddi.h>
  50 #include <sys/devops.h>
  51 #include <sys/file.h>
  52 #include <sys/filio.h>
  53 #include <sys/kstr.h>
  54 #include <sys/ldlinux.h>
  55 #include <sys/lx_ptm.h>
  56 #include <sys/modctl.h>
  57 #include <sys/pathname.h>
  58 #include <sys/ptms.h>
  59 #include <sys/ptyvar.h>
  60 #include <sys/stat.h>
  61 #include <sys/stropts.h>
  62 #include <sys/sunddi.h>
  63 #include <sys/sunldi.h>
  64 #include <sys/sysmacros.h>
  65 #include <sys/types.h>
  66 
  67 #define LP_PTM_PATH             "/dev/ptmx"
  68 #define LP_PTS_PATH             "/dev/pts/"
  69 #define LP_PTS_DRV_NAME         "pts"
  70 #define LP_PTS_USEC_DELAY       (5 * 1000)      /* 5 ms */
  71 #define LP_PTS_USEC_DELAY_MAX   (5 * MILLISEC)  /* 5 ms */
  72 
  73 /*
  74  * this driver is layered on top of the ptm driver.  we'd like to
  75  * make this drivers minor name space a mirror of the ptm drivers
  76  * namespace, but we can't actually do this.  the reason is that the
  77  * ptm driver is opened via the clone driver.  there for no minor nodes
  78  * of the ptm driver are actually accessible via the filesystem.
  79  * since we're not a streams device we can't be opened by the clone
  80  * driver.  there for we need to have at least minor node accessible
  81  * via the filesystem so that consumers can open it.  we use the device
  82  * node with a minor number of 0 for this purpose.  what this means is
  83  * that minor node 0 can't be used to map ptm minor node 0.  since this
  84  * minor node is now reserved we need to shift our ptm minor node
  85  * mappings by one.  ie. a ptm minor node with a value of 0 will
  86  * corrospond to our minor node with a value of 1.  these mappings are
  87  * managed with the following macros.
  88  */
  89 #define DEVT_TO_INDEX(x)        LX_PTM_DEV_TO_PTS(x)
  90 #define INDEX_TO_MINOR(x)       ((x) + 1)
  91 
  92 /*
  93  * grow our layered handle array by the same size increment that the ptm
  94  * driver uses to grow the pty device space - PTY_MAXDELTA
  95  */
  96 #define LP_PTY_INC      128
  97 
  98 /*
  99  * lx_ptm_ops contains state information about outstanding operations on the
 100  * underlying master terminal device.  Currently we only track information
 101  * for read operations.
 102  *
 103  * Note that this data has not been rolled directly into the lx_ptm_handle
 104  * structure because we can't put mutex's of condition variables into
 105  * lx_ptm_handle structure.  The reason is that the array of lx_ptm_handle
 106  * structures linked to from the global lx_ptm state can be resized
 107  * dynamically, and when it's resized, the new array is at a different
 108  * memory location and the old array memory is discarded.  Mutexs and cvs
 109  * are accessed based off their address, so if this array was re-sized while
 110  * there were outstanding operations on any mutexs or cvs in the array
 111  * then the system would tip over.  In the future the lx_ptm_handle structure
 112  * array should probably be replaced with either an array of pointers to
 113  * lx_ptm_handle structures or some other kind of data structure containing
 114  * pointers to lx_ptm_handle structures.  Then the lx_ptm_ops structure
 115  * could be folded directly into the lx_ptm_handle structures.  (This will
 116  * also require the definition of a new locking mechanism to protect the
 117  * contents of lx_ptm_handle structures.)
 118  */
 119 typedef struct lx_ptm_ops {
 120         int                     lpo_rops;
 121         kcondvar_t              lpo_rops_cv;
 122         kmutex_t                lpo_rops_lock;
 123 } lx_ptm_ops_t;
 124 
 125 /*
 126  * Every open of the master terminal device in a zone results in a new
 127  * lx_ptm_handle handle allocation.  These handles are stored in an array
 128  * hanging off the lx_ptm_state structure.
 129  */
 130 typedef struct lx_ptm_handle {
 131         /* Device handle to the underlying real /dev/ptmx master terminal. */
 132         ldi_handle_t            lph_handle;
 133 
 134         /* Flag to indicate if TIOCPKT mode has been enabled. */
 135         int                     lph_pktio;
 136 
 137         /* Number of times the slave device has been opened/closed. */
 138         int                     lph_eofed;
 139 
 140         /* Callback handler in the ptm driver to check if slave is open. */
 141         ptmptsopencb_t          lph_ppocb;
 142 
 143         /* Pointer to state for operations on underlying device. */
 144         lx_ptm_ops_t            *lph_lpo;
 145 } lx_ptm_handle_t;
 146 
 147 /*
 148  * Global state for the lx_ptm driver.
 149  */
 150 typedef struct lx_ptm_state {
 151         /* lx_ptm device devinfo pointer */
 152         dev_info_t              *lps_dip;
 153 
 154         /* LDI ident used to open underlying real /dev/ptmx master terminals. */
 155         ldi_ident_t             lps_li;
 156 
 157         /* pts drivers major number */
 158         major_t                 lps_pts_major;
 159 
 160         /* rw lock used to manage access and growth of lps_lh_array */
 161         krwlock_t               lps_lh_rwlock;
 162 
 163         /* number of elements in lps_lh_array */
 164         uint_t                  lps_lh_count;
 165 
 166         /* Array of handles to underlying real /dev/ptmx master terminals. */
 167         lx_ptm_handle_t         *lps_lh_array;
 168 } lx_ptm_state_t;
 169 
 170 /* Pointer to the lx_ptm global state structure. */
 171 static lx_ptm_state_t   lps;
 172 
 173 /*
 174  * List of modules to be autopushed onto slave terminal devices when they
 175  * are opened in an lx branded zone.
 176  */
 177 static char *lx_pts_mods[] = {
 178         "ptem",
 179         "ldterm",
 180         "ttcompat",
 181         LDLINUX_MOD,
 182         NULL
 183 };
 184 
 185 static void
 186 lx_ptm_lh_grow(uint_t index)
 187 {
 188         uint_t                  new_lh_count, old_lh_count;
 189         lx_ptm_handle_t         *new_lh_array, *old_lh_array;
 190 
 191         /*
 192          * allocate a new array.  we drop the rw lock on the array so that
 193          * readers can still access devices in case our memory allocation
 194          * blocks.
 195          */
 196         new_lh_count = MAX(lps.lps_lh_count + LP_PTY_INC, index + 1);
 197         new_lh_array =
 198             kmem_zalloc(sizeof (lx_ptm_handle_t) * new_lh_count, KM_SLEEP);
 199 
 200         /*
 201          * double check that we still actually need to increase the size
 202          * of the array
 203          */
 204         rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
 205         if (index < lps.lps_lh_count) {
 206                 /* someone beat us to it so there's nothing more to do */
 207                 rw_exit(&lps.lps_lh_rwlock);
 208                 kmem_free(new_lh_array,
 209                     sizeof (lx_ptm_handle_t) * new_lh_count);
 210                 return;
 211         }
 212 
 213         /* copy the existing data into the new array */
 214         ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL));
 215         ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL));
 216         if (lps.lps_lh_count != 0) {
 217                 bcopy(lps.lps_lh_array, new_lh_array,
 218                     sizeof (lx_ptm_handle_t) * lps.lps_lh_count);
 219         }
 220 
 221         /* save info on the old array */
 222         old_lh_array = lps.lps_lh_array;
 223         old_lh_count = lps.lps_lh_count;
 224 
 225         /* install the new array */
 226         lps.lps_lh_array = new_lh_array;
 227         lps.lps_lh_count = new_lh_count;
 228 
 229         rw_exit(&lps.lps_lh_rwlock);
 230 
 231         /* free the old array */
 232         if (old_lh_array != NULL) {
 233                 kmem_free(old_lh_array,
 234                     sizeof (lx_ptm_handle_t) * old_lh_count);
 235         }
 236 }
 237 
 238 static void
 239 lx_ptm_lh_insert(uint_t index, ldi_handle_t lh)
 240 {
 241         lx_ptm_ops_t *lpo;
 242 
 243         ASSERT(lh != NULL);
 244 
 245         /* Allocate and initialize the ops structure */
 246         lpo = kmem_zalloc(sizeof (lx_ptm_ops_t), KM_SLEEP);
 247         mutex_init(&lpo->lpo_rops_lock, NULL, MUTEX_DEFAULT, NULL);
 248         cv_init(&lpo->lpo_rops_cv, NULL, CV_DEFAULT, NULL);
 249 
 250         rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
 251 
 252         /* check if we need to grow the size of the layered handle array */
 253         if (index >= lps.lps_lh_count) {
 254                 rw_exit(&lps.lps_lh_rwlock);
 255                 lx_ptm_lh_grow(index);
 256                 rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
 257         }
 258 
 259         ASSERT(index < lps.lps_lh_count);
 260         ASSERT(lps.lps_lh_array[index].lph_handle == NULL);
 261         ASSERT(lps.lps_lh_array[index].lph_pktio == 0);
 262         ASSERT(lps.lps_lh_array[index].lph_eofed == 0);
 263         ASSERT(lps.lps_lh_array[index].lph_lpo == NULL);
 264 
 265         /* insert the new handle and return */
 266         lps.lps_lh_array[index].lph_handle = lh;
 267         lps.lps_lh_array[index].lph_pktio = 0;
 268         lps.lps_lh_array[index].lph_eofed = 0;
 269         lps.lps_lh_array[index].lph_lpo = lpo;
 270 
 271         rw_exit(&lps.lps_lh_rwlock);
 272 }
 273 
 274 static ldi_handle_t
 275 lx_ptm_lh_remove(uint_t index)
 276 {
 277         ldi_handle_t    lh;
 278 
 279         rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
 280 
 281         ASSERT(index < lps.lps_lh_count);
 282         ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
 283         ASSERT(lps.lps_lh_array[index].lph_lpo->lpo_rops == 0);
 284         ASSERT(!MUTEX_HELD(&lps.lps_lh_array[index].lph_lpo->lpo_rops_lock));
 285 
 286         /* free the write handle */
 287         kmem_free(lps.lps_lh_array[index].lph_lpo, sizeof (lx_ptm_ops_t));
 288         lps.lps_lh_array[index].lph_lpo = NULL;
 289 
 290         /* remove the handle and return it */
 291         lh = lps.lps_lh_array[index].lph_handle;
 292         lps.lps_lh_array[index].lph_handle = NULL;
 293         lps.lps_lh_array[index].lph_pktio = 0;
 294         lps.lps_lh_array[index].lph_eofed = 0;
 295         rw_exit(&lps.lps_lh_rwlock);
 296         return (lh);
 297 }
 298 
 299 static void
 300 lx_ptm_lh_get_ppocb(uint_t index, ptmptsopencb_t *ppocb)
 301 {
 302         rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
 303 
 304         ASSERT(index < lps.lps_lh_count);
 305         ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
 306 
 307         *ppocb = lps.lps_lh_array[index].lph_ppocb;
 308         rw_exit(&lps.lps_lh_rwlock);
 309 }
 310 
 311 static void
 312 lx_ptm_lh_set_ppocb(uint_t index, ptmptsopencb_t *ppocb)
 313 {
 314         rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
 315 
 316         ASSERT(index < lps.lps_lh_count);
 317         ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
 318 
 319         lps.lps_lh_array[index].lph_ppocb = *ppocb;
 320         rw_exit(&lps.lps_lh_rwlock);
 321 }
 322 
 323 static ldi_handle_t
 324 lx_ptm_lh_lookup(uint_t index)
 325 {
 326         ldi_handle_t    lh;
 327 
 328         rw_enter(&lps.lps_lh_rwlock, RW_READER);
 329 
 330         ASSERT(index < lps.lps_lh_count);
 331         ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
 332 
 333         /* return the handle */
 334         lh = lps.lps_lh_array[index].lph_handle;
 335         rw_exit(&lps.lps_lh_rwlock);
 336         return (lh);
 337 }
 338 
 339 static lx_ptm_ops_t *
 340 lx_ptm_lpo_lookup(uint_t index)
 341 {
 342         lx_ptm_ops_t    *lpo;
 343 
 344         rw_enter(&lps.lps_lh_rwlock, RW_READER);
 345 
 346         ASSERT(index < lps.lps_lh_count);
 347         ASSERT(lps.lps_lh_array[index].lph_lpo != NULL);
 348 
 349         /* return the handle */
 350         lpo = lps.lps_lh_array[index].lph_lpo;
 351         rw_exit(&lps.lps_lh_rwlock);
 352         return (lpo);
 353 }
 354 
 355 static int
 356 lx_ptm_lh_pktio_get(uint_t index)
 357 {
 358         int             pktio;
 359 
 360         rw_enter(&lps.lps_lh_rwlock, RW_READER);
 361 
 362         ASSERT(index < lps.lps_lh_count);
 363         ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
 364 
 365         /* return the pktio state */
 366         pktio = lps.lps_lh_array[index].lph_pktio;
 367         rw_exit(&lps.lps_lh_rwlock);
 368         return (pktio);
 369 }
 370 
 371 static void
 372 lx_ptm_lh_pktio_set(uint_t index, int pktio)
 373 {
 374         rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
 375 
 376         ASSERT(index < lps.lps_lh_count);
 377         ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
 378 
 379         /* set the pktio state */
 380         lps.lps_lh_array[index].lph_pktio = pktio;
 381         rw_exit(&lps.lps_lh_rwlock);
 382 }
 383 
 384 static int
 385 lx_ptm_lh_eofed_get(uint_t index)
 386 {
 387         int             eofed;
 388 
 389         rw_enter(&lps.lps_lh_rwlock, RW_READER);
 390 
 391         ASSERT(index < lps.lps_lh_count);
 392         ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
 393 
 394         /* return the eofed state */
 395         eofed = lps.lps_lh_array[index].lph_eofed;
 396         rw_exit(&lps.lps_lh_rwlock);
 397         return (eofed);
 398 }
 399 
 400 static void
 401 lx_ptm_lh_eofed_set(uint_t index)
 402 {
 403         rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
 404 
 405         ASSERT(index < lps.lps_lh_count);
 406         ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
 407 
 408         /* set the eofed state */
 409         lps.lps_lh_array[index].lph_eofed++;
 410         rw_exit(&lps.lps_lh_rwlock);
 411 }
 412 
 413 static int
 414 lx_ptm_read_start(dev_t dev)
 415 {
 416         lx_ptm_ops_t    *lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev));
 417 
 418         mutex_enter(&lpo->lpo_rops_lock);
 419         ASSERT(lpo->lpo_rops >= 0);
 420 
 421         /* Wait for other read operations to finish */
 422         while (lpo->lpo_rops != 0) {
 423                 if (cv_wait_sig(&lpo->lpo_rops_cv, &lpo->lpo_rops_lock) == 0) {
 424                         mutex_exit(&lpo->lpo_rops_lock);
 425                         return (-1);
 426                 }
 427         }
 428 
 429         /* Start a read operation */
 430         VERIFY(++lpo->lpo_rops == 1);
 431         mutex_exit(&lpo->lpo_rops_lock);
 432         return (0);
 433 }
 434 
 435 static void
 436 lx_ptm_read_end(dev_t dev)
 437 {
 438         lx_ptm_ops_t    *lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev));
 439 
 440         mutex_enter(&lpo->lpo_rops_lock);
 441         ASSERT(lpo->lpo_rops >= 0);
 442 
 443         /* End a read operation */
 444         VERIFY(--lpo->lpo_rops == 0);
 445         cv_signal(&lpo->lpo_rops_cv);
 446 
 447         mutex_exit(&lpo->lpo_rops_lock);
 448 }
 449 
 450 static int
 451 lx_ptm_pts_isopen(dev_t dev)
 452 {
 453         ptmptsopencb_t  ppocb;
 454 
 455         lx_ptm_lh_get_ppocb(DEVT_TO_INDEX(dev), &ppocb);
 456         return (ppocb.ppocb_func(ppocb.ppocb_arg));
 457 }
 458 
 459 static void
 460 lx_ptm_eof_read(ldi_handle_t lh)
 461 {
 462         struct uio      uio;
 463         iovec_t         iov;
 464         char            junk[1];
 465 
 466         /*
 467          * We can remove any EOF message from the head of the stream by
 468          * doing a zero byte read from the stream.
 469          */
 470         iov.iov_len = 0;
 471         iov.iov_base = junk;
 472         uio.uio_iovcnt = 1;
 473         uio.uio_iov = &iov;
 474         uio.uio_resid = iov.iov_len;
 475         uio.uio_offset = 0;
 476         uio.uio_segflg = UIO_SYSSPACE;
 477         uio.uio_fmode = 0;
 478         uio.uio_extflg = 0;
 479         uio.uio_llimit = MAXOFFSET_T;
 480         (void) ldi_read(lh, &uio, kcred);
 481 }
 482 
 483 static int
 484 lx_ptm_eof_drop_1(dev_t dev, int *rvalp)
 485 {
 486         ldi_handle_t    lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
 487         int             err, msg_size, msg_count;
 488 
 489         *rvalp = 0;
 490 
 491         /*
 492          * Check if there is an EOF message (represented by a zero length
 493          * data message) at the head of the stream.  Note that the
 494          * I_NREAD ioctl is a streams framework ioctl so it will succeed
 495          * even if there have been previous write errors on this stream.
 496          */
 497         if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size,
 498             FKIOCTL, kcred, &msg_count)) != 0)
 499                 return (err);
 500 
 501         if ((msg_count == 0) || (msg_size != 0)) {
 502                 /* No EOF message found */
 503                 return (0);
 504         }
 505 
 506         /* Record the fact that the slave device has been closed. */
 507         lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev));
 508 
 509         /* drop the EOF */
 510         lx_ptm_eof_read(lh);
 511         *rvalp = 1;
 512         return (0);
 513 }
 514 
 515 static int
 516 lx_ptm_eof_drop(dev_t dev, int *rvalp)
 517 {
 518         int rval, err;
 519 
 520         if (rvalp != NULL)
 521                 *rvalp = 0;
 522         for (;;) {
 523                 if ((err = lx_ptm_eof_drop_1(dev, &rval)) != 0)
 524                         return (err);
 525                 if (rval == 0)
 526                         return (0);
 527                 if (rvalp != NULL)
 528                         *rvalp = 1;
 529         }
 530 }
 531 
 532 static int
 533 lx_ptm_data_check(dev_t dev, int ignore_eof, int *rvalp)
 534 {
 535         ldi_handle_t    lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
 536         int             err;
 537 
 538         *rvalp = 0;
 539         if (ignore_eof) {
 540                 int     size, rval;
 541 
 542                 if ((err = ldi_ioctl(lh, FIONREAD, (intptr_t)&size,
 543                     FKIOCTL, kcred, &rval)) != 0)
 544                         return (err);
 545                 if (size != 0)
 546                         *rvalp = 1;
 547         } else {
 548                 int     msg_size, msg_count;
 549 
 550                 if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size,
 551                     FKIOCTL, kcred, &msg_count)) != 0)
 552                         return (err);
 553                 if (msg_count != 0)
 554                         *rvalp = 1;
 555         }
 556         return (0);
 557 }
 558 
 559 static int
 560 lx_ptm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 561 {
 562         int err;
 563 
 564         if (cmd != DDI_ATTACH)
 565                 return (DDI_FAILURE);
 566 
 567         if (ddi_create_minor_node(dip, LX_PTM_MINOR_NODE, S_IFCHR,
 568             ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
 569                 return (DDI_FAILURE);
 570 
 571         err = ldi_ident_from_dip(dip, &lps.lps_li);
 572         if (err != 0) {
 573                 ddi_remove_minor_node(dip, ddi_get_name(dip));
 574                 return (DDI_FAILURE);
 575         }
 576 
 577         lps.lps_dip = dip;
 578         lps.lps_pts_major = ddi_name_to_major(LP_PTS_DRV_NAME);
 579 
 580         rw_init(&lps.lps_lh_rwlock, NULL, RW_DRIVER, NULL);
 581         lps.lps_lh_count = 0;
 582         lps.lps_lh_array = NULL;
 583 
 584         return (DDI_SUCCESS);
 585 }
 586 
 587 /*ARGSUSED*/
 588 static int
 589 lx_ptm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 590 {
 591         if (cmd != DDI_DETACH)
 592                 return (DDI_FAILURE);
 593 
 594         ldi_ident_release(lps.lps_li);
 595         lps.lps_dip = NULL;
 596 
 597         ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL));
 598         ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL));
 599         if (lps.lps_lh_array != NULL) {
 600                 kmem_free(lps.lps_lh_array,
 601                     sizeof (lx_ptm_handle_t) * lps.lps_lh_count);
 602                 lps.lps_lh_array = NULL;
 603                 lps.lps_lh_count = 0;
 604         }
 605 
 606         return (DDI_SUCCESS);
 607 }
 608 
 609 /*ARGSUSED*/
 610 static int
 611 lx_ptm_open(dev_t *devp, int flag, int otyp, cred_t *credp)
 612 {
 613         struct strioctl iocb;
 614         ptmptsopencb_t  ppocb = { NULL, NULL };
 615         ldi_handle_t    lh;
 616         major_t         maj, our_major = getmajor(*devp);
 617         minor_t         min, lastmin;
 618         uint_t          index, anchor = 1;
 619         dev_t           ptm_dev;
 620         int             err, rval = 0;
 621 
 622         /*
 623          * Don't support the FNDELAY flag and FNONBLOCK until we either
 624          * find a Linux app that opens /dev/ptmx with the O_NDELAY
 625          * or O_NONBLOCK flags explicitly, or until we create test cases
 626          * to determine how reads of master terminal devices opened with
 627          * these flags behave in different situations on Linux.  Supporting
 628          * these flags will involve enhancing our read implementation
 629          * and changing the way it deals with EOF notifications.
 630          */
 631         if (flag & (FNDELAY | FNONBLOCK))
 632                 return (ENOTSUP);
 633 
 634         /*
 635          * we're layered on top of the ptm driver so open that driver
 636          * first.  (note that we're opening /dev/ptmx in the global
 637          * zone, not ourselves in the Linux zone.)
 638          */
 639         err = ldi_open_by_name(LP_PTM_PATH, flag, credp, &lh, lps.lps_li);
 640         if (err != 0)
 641                 return (err);
 642 
 643         /* get the devt returned by the ptmx open */
 644         err = ldi_get_dev(lh, &ptm_dev);
 645         if (err != 0) {
 646                 (void) ldi_close(lh, flag, credp);
 647                 return (err);
 648         }
 649 
 650         /*
 651          * we're a cloning driver so here's well change the devt that we
 652          * return.  the ptmx is also a cloning driver so we'll just use
 653          * it's minor number as our minor number (it already manages it's
 654          * minor name space so no reason to duplicate the effort.)
 655          */
 656         index = getminor(ptm_dev);
 657         *devp = makedevice(our_major, INDEX_TO_MINOR(index));
 658 
 659         /* Get a callback function to query if the pts device is open. */
 660         iocb.ic_cmd = PTMPTSOPENCB;
 661         iocb.ic_timout = 0;
 662         iocb.ic_len = sizeof (ppocb);
 663         iocb.ic_dp = (char *)&ppocb;
 664 
 665         err = ldi_ioctl(lh, I_STR, (intptr_t)&iocb, FKIOCTL, kcred, &rval);
 666         if ((err != 0) || (rval != 0)) {
 667                 (void) ldi_close(lh, flag, credp);
 668                 return (EIO); /* XXX return something else here? */
 669         }
 670         ASSERT(ppocb.ppocb_func != NULL);
 671 
 672         /*
 673          * now setup autopush for the terminal slave device.  this is
 674          * necessary so that when a Linux program opens the device we
 675          * can push required strmod modules onto the stream.  in Solaris
 676          * this is normally done by the application that actually
 677          * allocates the terminal.
 678          */
 679         maj = lps.lps_pts_major;
 680         min = index;
 681         lastmin = 0;
 682         err = kstr_autopush(SET_AUTOPUSH, &maj, &min, &lastmin,
 683             &anchor, lx_pts_mods);
 684         if (err != 0) {
 685                 (void) ldi_close(lh, flag, credp);
 686                 return (EIO); /* XXX return something else here? */
 687         }
 688 
 689         /* save off this layered handle for future accesses */
 690         lx_ptm_lh_insert(index, lh);
 691         lx_ptm_lh_set_ppocb(index, &ppocb);
 692         return (0);
 693 }
 694 
 695 /*ARGSUSED*/
 696 static int
 697 lx_ptm_close(dev_t dev, int flag, int otyp, cred_t *credp)
 698 {
 699         ldi_handle_t    lh;
 700         major_t         maj;
 701         minor_t         min, lastmin;
 702         uint_t          index;
 703         int             err;
 704 
 705         index = DEVT_TO_INDEX(dev);
 706 
 707         /*
 708          * we must cleanup all the state associated with this major/minor
 709          * terminal pair before actually closing the ptm master device.
 710          * this is required because once the close of the ptm device is
 711          * complete major/minor terminal pair is immediatly available for
 712          * re-use in any zone.
 713          */
 714 
 715         /* free up our saved reference for this layered handle */
 716         lh = lx_ptm_lh_remove(index);
 717 
 718         /* unconfigure autopush for the associated terminal slave device */
 719         maj = lps.lps_pts_major;
 720         min = index;
 721         lastmin = 0;
 722         do {
 723                 /*
 724                  * we loop here because we don't want to release this ptm
 725                  * node if autopush can't be disabled on the associated
 726                  * slave device because then bad things could happen if
 727                  * another brand were to get this terminal allocated
 728                  * to them.
 729                  *
 730                  * XXX should we ever give up?
 731                  */
 732                 err = kstr_autopush(CLR_AUTOPUSH, &maj, &min, &lastmin,
 733                     0, NULL);
 734         } while (err != 0);
 735 
 736         err = ldi_close(lh, flag, credp);
 737 
 738         /*
 739          * note that we don't have to bother with changing the permissions
 740          * on the associated slave device here.  the reason is that no one
 741          * can actually open the device untill it's associated master
 742          * device is re-opened, which will result in the permissions on
 743          * it being reset.
 744          */
 745         return (err);
 746 }
 747 
 748 static int
 749 lx_ptm_read_loop(dev_t dev, struct uio *uiop, cred_t *credp, int *loop)
 750 {
 751         ldi_handle_t    lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
 752         int             err, rval;
 753         struct uio      uio = *uiop;
 754 
 755         *loop = 0;
 756 
 757         /*
 758          * Here's another way that Linux master terminals behave differently
 759          * from Solaris master terminals.  If you do a read on a Linux
 760          * master terminal (that was opened witout NDELAY and NONBLOCK)
 761          * who's corrosponding slave terminal is currently closed and
 762          * has been opened and closed at least once, Linux return -1 and
 763          * set errno to EIO where as Solaris blocks.
 764          */
 765         if (lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev))) {
 766                 /* Slave has been opened and closed at least once. */
 767                 if (lx_ptm_pts_isopen(dev) == 0) {
 768                         /*
 769                          * Slave is closed.  Make sure that data is avaliable
 770                          * before attempting a read.
 771                          */
 772                         if ((err = lx_ptm_data_check(dev, 0, &rval)) != 0)
 773                                 return (err);
 774 
 775                         /* If there is no data available then return. */
 776                         if (rval == 0)
 777                                 return (EIO);
 778                 }
 779         }
 780 
 781         /* Actually do the read operation. */
 782         if ((err = ldi_read(lh, uiop, credp)) != 0)
 783                 return (err);
 784 
 785         /* If read returned actual data then return. */
 786         if (uio.uio_resid != uiop->uio_resid)
 787                 return (0);
 788 
 789         /*
 790          * This was a zero byte read (ie, an EOF).  This indicates
 791          * that the slave terinal device has been closed.  Record
 792          * the fact that the slave device has been closed and retry
 793          * the read operation.
 794          */
 795         lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev));
 796         *loop = 1;
 797         return (0);
 798 }
 799 
 800 static int
 801 lx_ptm_read(dev_t dev, struct uio *uiop, cred_t *credp)
 802 {
 803         int             pktio = lx_ptm_lh_pktio_get(DEVT_TO_INDEX(dev));
 804         int             err, loop;
 805         struct uio      uio;
 806         struct iovec    iovp;
 807 
 808         ASSERT(uiop->uio_iovcnt > 0);
 809 
 810         /*
 811          * If packet mode has been enabled (via TIOCPKT) we need to pad
 812          * all read requests with a leading byte that indicates any
 813          * relevant control status information.
 814          */
 815         if (pktio != 0) {
 816                 /*
 817                  * We'd like to write the control information into
 818                  * the current buffer but we can't yet.  We don't
 819                  * want to modify userspace memory here only to have
 820                  * the read operation fail later.  So instead
 821                  * what we'll do here is read one character from the
 822                  * beginning of the memory pointed to by the uio
 823                  * structure.  This will advance the output pointer
 824                  * by one.  Then when the read completes successfully
 825                  * we can update the byte that we passed over.  Before
 826                  * we do the read make a copy of the current uiop and
 827                  * iovec structs so we can write to them later.
 828                  */
 829                 uio = *uiop;
 830                 iovp = *uiop->uio_iov;
 831                 uio.uio_iov = &iovp;
 832 
 833                 if (uwritec(uiop) == -1)
 834                         return (EFAULT);
 835         }
 836 
 837         do {
 838                 /*
 839                  * Before we actually attempt a read operation we need
 840                  * to make sure there's some buffer space to actually
 841                  * read in some data.  We do this because if we're in
 842                  * pktio mode and the caller only requested one byte,
 843                  * then we've already used up that one byte and we
 844                  * don't want to pass this read request.  Doing a 0
 845                  * byte read (unless there is a problem with the stream
 846                  * head) always returns succcess.  Normally when a streams
 847                  * read returns 0 bytes we interpret that as an EOF on
 848                  * the stream (ie, the slave side has been opened and
 849                  * closed) and we ignore it and re-try the read operation.
 850                  * So if we pass on a 0 byte read here lx_ptm_read_loop()
 851                  * will tell us to loop around and we'll end up in an
 852                  * infinite loop.
 853                  */
 854                 if (uiop->uio_resid == 0)
 855                         break;
 856 
 857                 /*
 858                  * Serialize all reads.  We need to do this so that we can
 859                  * properly emulate the behavior of master terminals on Linux.
 860                  * In reality this serializaion should not pose any kind of
 861                  * performance problem since it would be very strange to have
 862                  * multiple threads trying to read from the same master
 863                  * terminal device concurrently.
 864                  */
 865                 if (lx_ptm_read_start(dev) != 0)
 866                         return (EINTR);
 867 
 868                 err = lx_ptm_read_loop(dev, uiop, credp, &loop);
 869                 lx_ptm_read_end(dev);
 870                 if (err != 0)
 871                         return (err);
 872         } while (loop != 0);
 873 
 874         if (pktio != 0) {
 875                 uint8_t         pktio_data = TIOCPKT_DATA;
 876 
 877                 /*
 878                  * Note that the control status information we
 879                  * pass back is faked up in the sense that we
 880                  * don't actually report any events, we always
 881                  * report a status of 0.
 882                  */
 883                 if (uiomove(&pktio_data, 1, UIO_READ, &uio) != 0)
 884                         return (EFAULT);
 885         }
 886 
 887         return (0);
 888 }
 889 
 890 static int
 891 lx_ptm_write(dev_t dev, struct uio *uiop, cred_t *credp)
 892 {
 893         ldi_handle_t            lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
 894         int             err;
 895 
 896         err = ldi_write(lh, uiop, credp);
 897 
 898         return (err);
 899 }
 900 
 901 static int
 902 lx_ptm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
 903     int *rvalp)
 904 {
 905         ldi_handle_t    lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
 906         int             err;
 907 
 908         /*
 909          * here we need to make sure that we never allow the
 910          * I_SETSIG and I_ESETSIG ioctls to pass through.  we
 911          * do this because we can't support them.
 912          *
 913          * the native Solaris ptm device supports these ioctls because
 914          * they are streams framework ioctls and all streams devices
 915          * support them by default.  these ioctls cause the current
 916          * process to be registered with a stream and receive signals
 917          * when certain stream events occur.
 918          *
 919          * a problem arises with cleanup of these registrations
 920          * for layered drivers.
 921          *
 922          * normally the streams framework is notified whenever a
 923          * process closes any reference to a stream and it goes ahead
 924          * and cleans up these registrations.  but actual device drivers
 925          * are not notified when a process performs a close operation
 926          * unless the process is closing the last opened reference to
 927          * the device on the entire system.
 928          *
 929          * so while we could pass these ioctls on and allow processes
 930          * to register for signal delivery, we would never receive
 931          * any notification when those processes exit (or close a
 932          * stream) and we wouldn't be able to unregister them.
 933          *
 934          * luckily these operations are streams specific and Linux
 935          * doesn't support streams devices.  so it doesn't actually
 936          * seem like we need to support these ioctls.  if it turns
 937          * out that we do need to support them for some reason in
 938          * the future, the current driver model will have to be
 939          * enhanced to better support streams device layering.
 940          */
 941         if ((cmd == I_SETSIG) || (cmd == I_ESETSIG))
 942                 return (EINVAL);
 943 
 944         /*
 945          * here we fake up support for TIOCPKT.  Linux applications expect
 946          * /etc/ptmx to support this ioctl, but on Solaris it doesn't.
 947          * (it is supported on older bsd style ptys.)  so we'll fake
 948          * up support for it here.
 949          *
 950          * the reason that this ioctl is emulated here instead of in
 951          * userland is that this ioctl affects the results returned
 952          * from read() operations.  if this ioctl was emulated in
 953          * userland the brand library would need to intercept all
 954          * read operations and check to see if pktio was enabled
 955          * for the fd being read from.  since this ioctl only needs
 956          * to be supported on the ptmx device it makes more sense
 957          * to support it here where we can easily update the results
 958          * returned for read() operations performed on ourselves.
 959          */
 960         if (cmd == TIOCPKT) {
 961                 int     pktio;
 962 
 963                 if (ddi_copyin((void *)arg, &pktio, sizeof (pktio),
 964                     mode) != DDI_SUCCESS)
 965                         return (EFAULT);
 966 
 967                 if (pktio == 0)
 968                         lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 0);
 969                 else
 970                         lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 1);
 971 
 972                 return (0);
 973         }
 974 
 975         err = ldi_ioctl(lh, cmd, arg, mode, credp, rvalp);
 976 
 977         return (err);
 978 }
 979 
 980 static int
 981 lx_ptm_poll_loop(dev_t dev, short events, int anyyet, short *reventsp,
 982     struct pollhead **phpp, int *loop)
 983 {
 984         ldi_handle_t    lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
 985         short           reventsp2;
 986         int             err, rval;
 987 
 988         *loop = 0;
 989 
 990         /*
 991          * If the slave device has been opened and closed at least
 992          * once and the slave device is currently closed, then poll
 993          * always needs to returns immediatly.
 994          */
 995         if ((lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev)) != 0) &&
 996             (lx_ptm_pts_isopen(dev) == 0)) {
 997                 /* In this case always return POLLHUP */
 998                 *reventsp = POLLHUP;
 999 
1000                 /*
1001                  * Check if there really is data on the stream.
1002                  * If so set the correct return flags.
1003                  */
1004                 if ((err = lx_ptm_data_check(dev, 1, &rval)) != 0) {
1005                         /* Something went wrong. */
1006                         return (err);
1007                 }
1008                 if (rval != 0)
1009                         *reventsp |= (events & (POLLIN | POLLRDNORM));
1010 
1011                 /*
1012                  * Is the user checking for writability?  Note that for ptm
1013                  * devices Linux seems to ignore the POLLWRBAND write flag.
1014                  */
1015                 if ((events & POLLWRNORM) == 0)
1016                         return (0);
1017 
1018                 /*
1019                  * To check if the stream is writable we have to actually
1020                  * call poll, but make sure to set anyyet to 1 to prevent
1021                  * the streams framework from setting up callbacks.
1022                  */
1023                 if ((err = ldi_poll(lh, POLLWRNORM, 1, &reventsp2, NULL)) != 0)
1024                         return (err);
1025 
1026                 *reventsp |= (reventsp2 & POLLWRNORM);
1027         } else {
1028                 int lockstate;
1029 
1030                 /* The slave device is open, do the poll */
1031                 if ((err = ldi_poll(lh, events, anyyet, reventsp, phpp)) != 0)
1032                         return (err);
1033 
1034                 /*
1035                  * Drop any leading EOFs on the stream.
1036                  *
1037                  * Note that we have to use pollunlock() here to avoid
1038                  * recursive mutex enters in the poll framework.  The
1039                  * reason is that if there is an EOF message on the stream
1040                  * then the act of reading from the queue to remove the
1041                  * message can cause the ptm drivers event service
1042                  * routine to be invoked, and if there is no open
1043                  * slave device then the ptm driver may generate
1044                  * error messages and put them on the stream.  This
1045                  * in turn will generate a poll event and the poll
1046                  * framework will try to invoke any poll callbacks
1047                  * associated with the stream.  In the process of
1048                  * doing that the poll framework will try to aquire
1049                  * locks that we are already holding.  So we need to
1050                  * drop those locks here before we do our read.
1051                  */
1052                 lockstate = pollunlock();
1053                 err = lx_ptm_eof_drop(dev, &rval);
1054                 pollrelock(lockstate);
1055                 if (err)
1056                         return (err);
1057 
1058                 /* If no EOF was dropped then return */
1059                 if (rval == 0)
1060                         return (0);
1061 
1062                 /*
1063                  * An EOF was removed from the stream.  Retry the entire
1064                  * poll operation from the top because polls on the ptm
1065                  * device should behave differently now.
1066                  */
1067                 *loop = 1;
1068         }
1069         return (0);
1070 }
1071 
1072 static int
1073 lx_ptm_poll(dev_t dev, short events, int anyyet, short *reventsp,
1074     struct pollhead **phpp)
1075 {
1076         int loop, err;
1077 
1078         do {
1079                 /* Serialize ourself wrt read operations. */
1080                 if (lx_ptm_read_start(dev) != 0)
1081                         return (EINTR);
1082 
1083                 err = lx_ptm_poll_loop(dev,
1084                     events, anyyet, reventsp, phpp, &loop);
1085                 lx_ptm_read_end(dev);
1086                 if (err != 0)
1087                         return (err);
1088         } while (loop != 0);
1089         return (0);
1090 }
1091 
1092 static struct cb_ops lx_ptm_cb_ops = {
1093         lx_ptm_open,            /* open */
1094         lx_ptm_close,           /* close */
1095         nodev,                  /* strategy */
1096         nodev,                  /* print */
1097         nodev,                  /* dump */
1098         lx_ptm_read,            /* read */
1099         lx_ptm_write,           /* write */
1100         lx_ptm_ioctl,           /* ioctl */
1101         nodev,                  /* devmap */
1102         nodev,                  /* mmap */
1103         nodev,                  /* segmap */
1104         lx_ptm_poll,            /* chpoll */
1105         ddi_prop_op,            /* prop_op */
1106         NULL,                   /* cb_str */
1107         D_NEW | D_MP,
1108         CB_REV,
1109         NULL,
1110         NULL
1111 };
1112 
1113 static struct dev_ops lx_ptm_ops = {
1114         DEVO_REV,
1115         0,
1116         ddi_getinfo_1to1,
1117         nulldev,
1118         nulldev,
1119         lx_ptm_attach,
1120         lx_ptm_detach,
1121         nodev,
1122         &lx_ptm_cb_ops,
1123         NULL,
1124         NULL,
1125         ddi_quiesce_not_needed,         /* quiesce */
1126 };
1127 
1128 static struct modldrv modldrv = {
1129         &mod_driverops,                     /* type of module */
1130         "Linux master terminal driver", /* description of module */
1131         &lx_ptm_ops                 /* driver ops */
1132 };
1133 
1134 static struct modlinkage modlinkage = {
1135         MODREV_1,
1136         &modldrv,
1137         NULL
1138 };
1139 
1140 int
1141 _init(void)
1142 {
1143         return (mod_install(&modlinkage));
1144 }
1145 
1146 int
1147 _info(struct modinfo *modinfop)
1148 {
1149         return (mod_info(&modlinkage, modinfop));
1150 }
1151 
1152 int
1153 _fini(void)
1154 {
1155         return (mod_remove(&modlinkage));
1156 }