1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #pragma ident   "%Z%%M% %I%     %E% SMI"
  27 
  28 #include <errno.h>
  29 #include <unistd.h>
  30 #include <sys/mman.h>
  31 #include <sys/param.h>
  32 #include <sys/lx_debug.h>
  33 #include <sys/lx_misc.h>
  34 
  35 /*
  36  * There are two forms of mmap, mmap() and mmap2().  The only difference is that
  37  * the final argument to mmap2() specifies the number of pages, not bytes.
  38  * Linux has a number of additional flags, but they are all deprecated.  We also
  39  * ignore the MAP_GROWSDOWN flag, which has no equivalent on Solaris.
  40  *
  41  * The Linux mmap() returns ENOMEM in some cases where Solaris returns
  42  * EOVERFLOW, so we translate the errno as necessary.
  43  */
  44 
  45 int pagesize;   /* needed for mmap2() */
  46 
  47 #define LX_MAP_ANONYMOUS        0x00020
  48 #define LX_MAP_NORESERVE        0x04000
  49 
  50 static int
  51 ltos_mmap_flags(int flags)
  52 {
  53         int new_flags;
  54 
  55         new_flags = flags & (MAP_TYPE | MAP_FIXED);
  56         if (flags & LX_MAP_ANONYMOUS)
  57                 new_flags |= MAP_ANONYMOUS;
  58         if (flags & LX_MAP_NORESERVE)
  59                 new_flags |= MAP_NORESERVE;
  60 
  61         return (new_flags);
  62 }
  63 
  64 static int
  65 mmap_common(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
  66     uintptr_t p5, off64_t p6)
  67 {
  68         void *addr = (void *)p1;
  69         size_t len = p2;
  70         int prot = p3;
  71         int flags = p4;
  72         int fd = p5;
  73         off64_t off = p6;
  74         void *ret;
  75 
  76         if (lx_debug_enabled != 0) {
  77                 char *path, path_buf[MAXPATHLEN];
  78 
  79                 path = lx_fd_to_path(fd, path_buf, sizeof (path_buf));
  80                 if (path == NULL)
  81                         path = "?";
  82 
  83                 lx_debug("\tmmap_common(): fd = %d - %s", fd, path);
  84         }
  85 
  86         /*
  87          * Under Linux, the file descriptor is ignored when mapping zfod
  88          * anonymous memory,  On Solaris, we want the fd set to -1 for the
  89          * same functionality.
  90          */
  91         if (flags & LX_MAP_ANONYMOUS)
  92                 fd = -1;
  93 
  94         /*
  95          * This is totally insane. The NOTES section in the linux mmap(2) man
  96          * page claims that on some architectures, read protection may
  97          * automatically include exec protection. It has been observed on a
  98          * native linux system that the /proc/<pid>/maps file does indeed
  99          * show that segments mmap'd from userland (such as libraries mapped in
 100          * by the dynamic linker) all have exec the permission set, even for
 101          * data segments.
 102          */
 103         if (prot & PROT_READ)
 104                 prot |= PROT_EXEC;
 105 
 106         ret = mmap64(addr, len, prot, ltos_mmap_flags(flags), fd, off);
 107 
 108         if (ret == MAP_FAILED)
 109                 return (errno == EOVERFLOW ? -ENOMEM : -errno);
 110         else
 111                 return ((int)ret);
 112 }
 113 
 114 int
 115 lx_mmap(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
 116     uintptr_t p5, uintptr_t p6)
 117 {
 118         return (mmap_common(p1, p2, p3, p4, p5, (off64_t)p6));
 119 }
 120 
 121 int
 122 lx_mmap2(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
 123     uintptr_t p5, uintptr_t p6)
 124 {
 125         if (pagesize == 0)
 126                 pagesize = sysconf(_SC_PAGESIZE);
 127 
 128         return (mmap_common(p1, p2, p3, p4, p5, (off64_t)p6 * pagesize));
 129 }
 130 
 131 
 132 /*
 133  * The locking family of system calls, as well as msync(), are identical.  On
 134  * Solaris, they are layered on top of the memcntl syscall, so they cannot be
 135  * pass-thru.
 136  */
 137 int
 138 lx_mlock(uintptr_t addr, uintptr_t len)
 139 {
 140         uintptr_t addr1 = addr & PAGEMASK;
 141         uintptr_t len1 = len + (addr & PAGEOFFSET);
 142 
 143         return (mlock((void *)addr1, (size_t)len1) ? -errno : 0);
 144 }
 145 
 146 int
 147 lx_mlockall(uintptr_t flags)
 148 {
 149         return (mlockall(flags) ? -errno : 0);
 150 }
 151 
 152 int
 153 lx_munlock(uintptr_t addr, uintptr_t len)
 154 {
 155         uintptr_t addr1 = addr & PAGEMASK;
 156         uintptr_t len1 = len + (addr & PAGEOFFSET);
 157 
 158         return (munlock((void *)addr1, (size_t)len1) ? -errno : 0);
 159 }
 160 
 161 int
 162 lx_munlockall(void)
 163 {
 164         return (munlockall() ? -errno : 0);
 165 }
 166 
 167 int
 168 lx_msync(uintptr_t addr, uintptr_t len, uintptr_t flags)
 169 {
 170         return (msync((void *)addr, (size_t)len, flags) ? -errno : 0);
 171 }
 172 
 173 /*
 174  * Solaris recognizes more flags than Linux, so we don't want to inadvertently
 175  * use what would be an invalid flag on Linux.  Linux also allows the length to
 176  * be zero, while Solaris does not.
 177  */
 178 int
 179 lx_madvise(uintptr_t start, uintptr_t len, uintptr_t advice)
 180 {
 181         if (len == 0)
 182                 return (0);
 183 
 184         switch (advice) {
 185         case MADV_NORMAL:
 186         case MADV_RANDOM:
 187         case MADV_SEQUENTIAL:
 188         case MADV_WILLNEED:
 189         case MADV_DONTNEED:
 190                 return (madvise((void *)start, len, advice) ? -errno : 0);
 191 
 192         default:
 193                 return (-EINVAL);
 194         }
 195 }
 196 
 197 /*
 198  * mprotect() is identical except that we ignore the Linux flags PROT_GROWSDOWN
 199  * and PROT_GROWSUP, which have no equivalent on Solaris.
 200  */
 201 #define LX_PROT_GROWSDOWN       0x01000000
 202 #define LX_PROT_GROWSUP         0x02000000
 203 
 204 int
 205 lx_mprotect(uintptr_t start, uintptr_t len, uintptr_t prot)
 206 {
 207         prot &= ~(LX_PROT_GROWSUP | LX_PROT_GROWSDOWN);
 208 
 209         return (mprotect((void *)start, len, prot) ? -errno : 0);
 210 }