1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright (c) 1992 Terrence R. Lambert.
  28  * Copyright (c) 1990 The Regents of the University of California.
  29  * All rights reserved.
  30  *
  31  * This code is derived from software contributed to Berkeley by
  32  * William Jolitz.
  33  *
  34  * Redistribution and use in source and binary forms, with or without
  35  * modification, are permitted provided that the following conditions
  36  * are met:
  37  * 1. Redistributions of source code must retain the above copyright
  38  *    notice, this list of conditions and the following disclaimer.
  39  * 2. Redistributions in binary form must reproduce the above copyright
  40  *    notice, this list of conditions and the following disclaimer in the
  41  *    documentation and/or other materials provided with the distribution.
  42  * 3. All advertising materials mentioning features or use of this software
  43  *    must display the following acknowledgement:
  44  *      This product includes software developed by the University of
  45  *      California, Berkeley and its contributors.
  46  * 4. Neither the name of the University nor the names of its contributors
  47  *    may be used to endorse or promote products derived from this software
  48  *    without specific prior written permission.
  49  *
  50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  60  * SUCH DAMAGE.
  61  *
  62  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
  63  */
  64 
  65 #include <sys/types.h>
  66 #include <sys/sysmacros.h>
  67 #include <sys/tss.h>
  68 #include <sys/segments.h>
  69 #include <sys/trap.h>
  70 #include <sys/cpuvar.h>
  71 #include <sys/bootconf.h>
  72 #include <sys/x86_archext.h>
  73 #include <sys/controlregs.h>
  74 #include <sys/archsystm.h>
  75 #include <sys/machsystm.h>
  76 #include <sys/kobj.h>
  77 #include <sys/cmn_err.h>
  78 #include <sys/reboot.h>
  79 #include <sys/kdi.h>
  80 #include <sys/mach_mmu.h>
  81 #include <sys/systm.h>
  82 
  83 #ifdef __xpv
  84 #include <sys/hypervisor.h>
  85 #include <vm/as.h>
  86 #endif
  87 
  88 #include <sys/promif.h>
  89 #include <sys/bootinfo.h>
  90 #include <vm/kboot_mmu.h>
  91 #include <vm/hat_pte.h>
  92 
  93 /*
  94  * cpu0 and default tables and structures.
  95  */
  96 user_desc_t     *gdt0;
  97 #if !defined(__xpv)
  98 desctbr_t       gdt0_default_r;
  99 #endif
 100 
 101 gate_desc_t     *idt0;          /* interrupt descriptor table */
 102 #if defined(__i386)
 103 desctbr_t       idt0_default_r;         /* describes idt0 in IDTR format */
 104 #endif
 105 
 106 struct tss      *ktss0;                 /* kernel task state structure */
 107 
 108 #if defined(__i386)
 109 struct tss      *dftss0;                /* #DF double-fault exception */
 110 #endif  /* __i386 */
 111 
 112 user_desc_t     zero_udesc;             /* base zero user desc native procs */
 113 user_desc_t     null_udesc;             /* null user descriptor */
 114 system_desc_t   null_sdesc;             /* null system descriptor */
 115 
 116 #if defined(__amd64)
 117 user_desc_t     zero_u32desc;           /* 32-bit compatibility procs */
 118 #endif  /* __amd64 */
 119 
 120 #if defined(__amd64)
 121 user_desc_t     ucs_on;
 122 user_desc_t     ucs_off;
 123 user_desc_t     ucs32_on;
 124 user_desc_t     ucs32_off;
 125 #endif  /* __amd64 */
 126 
 127 #pragma align   16(dblfault_stack0)
 128 char            dblfault_stack0[DEFAULTSTKSZ];
 129 
 130 extern void     fast_null(void);
 131 extern hrtime_t get_hrtime(void);
 132 extern hrtime_t gethrvtime(void);
 133 extern hrtime_t get_hrestime(void);
 134 extern uint64_t getlgrp(void);
 135 
 136 void (*(fasttable[]))(void) = {
 137         fast_null,                      /* T_FNULL routine */
 138         fast_null,                      /* T_FGETFP routine (initially null) */
 139         fast_null,                      /* T_FSETFP routine (initially null) */
 140         (void (*)())get_hrtime,         /* T_GETHRTIME */
 141         (void (*)())gethrvtime,         /* T_GETHRVTIME */
 142         (void (*)())get_hrestime,       /* T_GETHRESTIME */
 143         (void (*)())getlgrp             /* T_GETLGRP */
 144 };
 145 
 146 /*
 147  * Structure containing pre-computed descriptors to allow us to temporarily
 148  * interpose on a standard handler.
 149  */
 150 struct interposing_handler {
 151         int ih_inum;
 152         gate_desc_t ih_interp_desc;
 153         gate_desc_t ih_default_desc;
 154 };
 155 
 156 /*
 157  * The brand infrastructure interposes on two handlers, and we use one as a
 158  * NULL signpost.
 159  */
 160 static struct interposing_handler brand_tbl[3];
 161 
 162 /*
 163  * software prototypes for default local descriptor table
 164  */
 165 
 166 /*
 167  * Routines for loading segment descriptors in format the hardware
 168  * can understand.
 169  */
 170 
 171 #if defined(__amd64)
 172 
 173 /*
 174  * In long mode we have the new L or long mode attribute bit
 175  * for code segments. Only the conforming bit in type is used along
 176  * with descriptor priority and present bits. Default operand size must
 177  * be zero when in long mode. In 32-bit compatibility mode all fields
 178  * are treated as in legacy mode. For data segments while in long mode
 179  * only the present bit is loaded.
 180  */
 181 void
 182 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
 183     uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
 184 {
 185         ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
 186 
 187         /*
 188          * 64-bit long mode.
 189          */
 190         if (lmode == SDP_LONG)
 191                 dp->usd_def32 = 0;           /* 32-bit operands only */
 192         else
 193                 /*
 194                  * 32-bit compatibility mode.
 195                  */
 196                 dp->usd_def32 = defopsz;     /* 0 = 16, 1 = 32-bit ops */
 197 
 198         dp->usd_long = lmode;        /* 64-bit mode */
 199         dp->usd_type = type;
 200         dp->usd_dpl = dpl;
 201         dp->usd_p = 1;
 202         dp->usd_gran = gran;         /* 0 = bytes, 1 = pages */
 203 
 204         dp->usd_lobase = (uintptr_t)base;
 205         dp->usd_midbase = (uintptr_t)base >> 16;
 206         dp->usd_hibase = (uintptr_t)base >> (16 + 8);
 207         dp->usd_lolimit = size;
 208         dp->usd_hilimit = (uintptr_t)size >> 16;
 209 }
 210 
 211 #elif defined(__i386)
 212 
 213 /*
 214  * Install user segment descriptor for code and data.
 215  */
 216 void
 217 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
 218     uint_t dpl, uint_t gran, uint_t defopsz)
 219 {
 220         dp->usd_lolimit = size;
 221         dp->usd_hilimit = (uintptr_t)size >> 16;
 222 
 223         dp->usd_lobase = (uintptr_t)base;
 224         dp->usd_midbase = (uintptr_t)base >> 16;
 225         dp->usd_hibase = (uintptr_t)base >> (16 + 8);
 226 
 227         dp->usd_type = type;
 228         dp->usd_dpl = dpl;
 229         dp->usd_p = 1;
 230         dp->usd_def32 = defopsz;     /* 0 = 16, 1 = 32 bit operands */
 231         dp->usd_gran = gran;         /* 0 = bytes, 1 = pages */
 232 }
 233 
 234 #endif  /* __i386 */
 235 
 236 /*
 237  * Install system segment descriptor for LDT and TSS segments.
 238  */
 239 
 240 #if defined(__amd64)
 241 
 242 void
 243 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
 244     uint_t dpl)
 245 {
 246         dp->ssd_lolimit = size;
 247         dp->ssd_hilimit = (uintptr_t)size >> 16;
 248 
 249         dp->ssd_lobase = (uintptr_t)base;
 250         dp->ssd_midbase = (uintptr_t)base >> 16;
 251         dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
 252         dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
 253 
 254         dp->ssd_type = type;
 255         dp->ssd_zero1 = 0;   /* must be zero */
 256         dp->ssd_zero2 = 0;
 257         dp->ssd_dpl = dpl;
 258         dp->ssd_p = 1;
 259         dp->ssd_gran = 0;    /* force byte units */
 260 }
 261 
 262 void *
 263 get_ssd_base(system_desc_t *dp)
 264 {
 265         uintptr_t       base;
 266 
 267         base = (uintptr_t)dp->ssd_lobase |
 268             (uintptr_t)dp->ssd_midbase << 16 |
 269             (uintptr_t)dp->ssd_hibase << (16 + 8) |
 270             (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
 271         return ((void *)base);
 272 }
 273 
 274 #elif defined(__i386)
 275 
 276 void
 277 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
 278     uint_t dpl)
 279 {
 280         dp->ssd_lolimit = size;
 281         dp->ssd_hilimit = (uintptr_t)size >> 16;
 282 
 283         dp->ssd_lobase = (uintptr_t)base;
 284         dp->ssd_midbase = (uintptr_t)base >> 16;
 285         dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
 286 
 287         dp->ssd_type = type;
 288         dp->ssd_zero = 0;    /* must be zero */
 289         dp->ssd_dpl = dpl;
 290         dp->ssd_p = 1;
 291         dp->ssd_gran = 0;    /* force byte units */
 292 }
 293 
 294 void *
 295 get_ssd_base(system_desc_t *dp)
 296 {
 297         uintptr_t       base;
 298 
 299         base = (uintptr_t)dp->ssd_lobase |
 300             (uintptr_t)dp->ssd_midbase << 16 |
 301             (uintptr_t)dp->ssd_hibase << (16 + 8);
 302         return ((void *)base);
 303 }
 304 
 305 #endif  /* __i386 */
 306 
 307 /*
 308  * Install gate segment descriptor for interrupt, trap, call and task gates.
 309  */
 310 
 311 #if defined(__amd64)
 312 
 313 /*ARGSUSED*/
 314 void
 315 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
 316     uint_t type, uint_t dpl, uint_t vector)
 317 {
 318         dp->sgd_looffset = (uintptr_t)func;
 319         dp->sgd_hioffset = (uintptr_t)func >> 16;
 320         dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
 321 
 322         dp->sgd_selector =  (uint16_t)sel;
 323 
 324         /*
 325          * For 64 bit native we use the IST stack mechanism
 326          * for double faults. All other traps use the CPL = 0
 327          * (tss_rsp0) stack.
 328          */
 329 #if !defined(__xpv)
 330         if (vector == T_DBLFLT)
 331                 dp->sgd_ist = 1;
 332         else
 333 #endif
 334                 dp->sgd_ist = 0;
 335 
 336         dp->sgd_type = type;
 337         dp->sgd_dpl = dpl;
 338         dp->sgd_p = 1;
 339 }
 340 
 341 #elif defined(__i386)
 342 
 343 /*ARGSUSED*/
 344 void
 345 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
 346     uint_t type, uint_t dpl, uint_t unused)
 347 {
 348         dp->sgd_looffset = (uintptr_t)func;
 349         dp->sgd_hioffset = (uintptr_t)func >> 16;
 350 
 351         dp->sgd_selector =  (uint16_t)sel;
 352         dp->sgd_stkcpy = 0;  /* always zero bytes */
 353         dp->sgd_type = type;
 354         dp->sgd_dpl = dpl;
 355         dp->sgd_p = 1;
 356 }
 357 
 358 #endif  /* __i386 */
 359 
 360 /*
 361  * Updates a single user descriptor in the the GDT of the current cpu.
 362  * Caller is responsible for preventing cpu migration.
 363  */
 364 
 365 void
 366 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
 367 {
 368 #if defined(__xpv)
 369 
 370         uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
 371 
 372         if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
 373                 panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
 374 
 375 #else   /* __xpv */
 376 
 377         CPU->cpu_gdt[sidx] = *udp;
 378 
 379 #endif  /* __xpv */
 380 }
 381 
 382 /*
 383  * Writes single descriptor pointed to by udp into a processes
 384  * LDT entry pointed to by ldp.
 385  */
 386 int
 387 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
 388 {
 389 #if defined(__xpv)
 390 
 391         uint64_t dpa;
 392 
 393         dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
 394             ((uintptr_t)ldp & PAGEOFFSET);
 395 
 396         /*
 397          * The hypervisor is a little more restrictive about what it
 398          * supports in the LDT.
 399          */
 400         if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
 401                 return (EINVAL);
 402 
 403 #else   /* __xpv */
 404 
 405         *ldp = *udp;
 406 
 407 #endif  /* __xpv */
 408         return (0);
 409 }
 410 
 411 #if defined(__xpv)
 412 
 413 /*
 414  * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
 415  * Returns true if a valid entry was written.
 416  */
 417 int
 418 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
 419 {
 420         trap_info_t *ti = ti_arg;       /* XXPV Aargh - segments.h comment */
 421 
 422         /*
 423          * skip holes in the IDT
 424          */
 425         if (GATESEG_GETOFFSET(sgd) == 0)
 426                 return (0);
 427 
 428         ASSERT(sgd->sgd_type == SDT_SYSIGT);
 429         ti->vector = vec;
 430         TI_SET_DPL(ti, sgd->sgd_dpl);
 431 
 432         /*
 433          * Is this an interrupt gate?
 434          */
 435         if (sgd->sgd_type == SDT_SYSIGT) {
 436                 /* LINTED */
 437                 TI_SET_IF(ti, 1);
 438         }
 439         ti->cs = sgd->sgd_selector;
 440 #if defined(__amd64)
 441         ti->cs |= SEL_KPL;   /* force into ring 3. see KCS_SEL  */
 442 #endif
 443         ti->address = GATESEG_GETOFFSET(sgd);
 444         return (1);
 445 }
 446 
 447 /*
 448  * Convert a single hw format gate descriptor and write it into our virtual IDT.
 449  */
 450 void
 451 xen_idt_write(gate_desc_t *sgd, uint_t vec)
 452 {
 453         trap_info_t trapinfo[2];
 454 
 455         bzero(trapinfo, sizeof (trapinfo));
 456         if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
 457                 return;
 458         if (xen_set_trap_table(trapinfo) != 0)
 459                 panic("xen_idt_write: xen_set_trap_table() failed");
 460 }
 461 
 462 #endif  /* __xpv */
 463 
 464 #if defined(__amd64)
 465 
 466 /*
 467  * Build kernel GDT.
 468  */
 469 
 470 static void
 471 init_gdt_common(user_desc_t *gdt)
 472 {
 473         int i;
 474 
 475         /*
 476          * 64-bit kernel code segment.
 477          */
 478         set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
 479             SDP_PAGES, SDP_OP32);
 480 
 481         /*
 482          * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
 483          * mode, but we set it here to 0xFFFF so that we can use the SYSRET
 484          * instruction to return from system calls back to 32-bit applications.
 485          * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
 486          * descriptors. We therefore must ensure that the kernel uses something,
 487          * though it will be ignored by hardware, that is compatible with 32-bit
 488          * apps. For the same reason we must set the default op size of this
 489          * descriptor to 32-bit operands.
 490          */
 491         set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
 492             SEL_KPL, SDP_PAGES, SDP_OP32);
 493         gdt[GDT_KDATA].usd_def32 = 1;
 494 
 495         /*
 496          * 64-bit user code segment.
 497          */
 498         set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
 499             SDP_PAGES, SDP_OP32);
 500 
 501         /*
 502          * 32-bit user code segment.
 503          */
 504         set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
 505             SEL_UPL, SDP_PAGES, SDP_OP32);
 506 
 507         /*
 508          * See gdt_ucode32() and gdt_ucode_native().
 509          */
 510         ucs_on = ucs_off = gdt[GDT_UCODE];
 511         ucs_off.usd_p = 0;      /* forces #np fault */
 512 
 513         ucs32_on = ucs32_off = gdt[GDT_U32CODE];
 514         ucs32_off.usd_p = 0;    /* forces #np fault */
 515 
 516         /*
 517          * 32 and 64 bit data segments can actually share the same descriptor.
 518          * In long mode only the present bit is checked but all other fields
 519          * are loaded. But in compatibility mode all fields are interpreted
 520          * as in legacy mode so they must be set correctly for a 32-bit data
 521          * segment.
 522          */
 523         set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
 524             SDP_PAGES, SDP_OP32);
 525 
 526 #if !defined(__xpv)
 527 
 528         /*
 529          * The 64-bit kernel has no default LDT. By default, the LDT descriptor
 530          * in the GDT is 0.
 531          */
 532 
 533         /*
 534          * Kernel TSS
 535          */
 536         set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
 537             sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
 538 
 539 #endif  /* !__xpv */
 540 
 541         /*
 542          * Initialize fs and gs descriptors for 32 bit processes.
 543          * Only attributes and limits are initialized, the effective
 544          * base address is programmed via fsbase/gsbase.
 545          */
 546         set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
 547             SEL_UPL, SDP_PAGES, SDP_OP32);
 548         set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
 549             SEL_UPL, SDP_PAGES, SDP_OP32);
 550 
 551         /*
 552          * Initialize the descriptors set aside for brand usage.
 553          * Only attributes and limits are initialized.
 554          */
 555         for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
 556                 set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
 557                     SEL_UPL, SDP_PAGES, SDP_OP32);
 558 
 559         /*
 560          * Initialize convenient zero base user descriptors for clearing
 561          * lwp private %fs and %gs descriptors in GDT. See setregs() for
 562          * an example.
 563          */
 564         set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
 565             SDP_BYTES, SDP_OP32);
 566         set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
 567             SDP_PAGES, SDP_OP32);
 568 }
 569 
 570 #if defined(__xpv)
 571 
 572 static user_desc_t *
 573 init_gdt(void)
 574 {
 575         uint64_t gdtpa;
 576         ulong_t ma[1];          /* XXPV should be a memory_t */
 577         ulong_t addr;
 578 
 579 #if !defined(__lint)
 580         /*
 581          * Our gdt is never larger than a single page.
 582          */
 583         ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 584 #endif
 585         gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 586             PAGESIZE, PAGESIZE);
 587         bzero(gdt0, PAGESIZE);
 588 
 589         init_gdt_common(gdt0);
 590 
 591         /*
 592          * XXX Since we never invoke kmdb until after the kernel takes
 593          * over the descriptor tables why not have it use the kernel's
 594          * selectors?
 595          */
 596         if (boothowto & RB_DEBUG) {
 597                 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
 598                     SEL_KPL, SDP_PAGES, SDP_OP32);
 599                 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
 600                     SEL_KPL, SDP_PAGES, SDP_OP32);
 601         }
 602 
 603         /*
 604          * Clear write permission for page containing the gdt and install it.
 605          */
 606         gdtpa = pfn_to_pa(va_to_pfn(gdt0));
 607         ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
 608         kbm_read_only((uintptr_t)gdt0, gdtpa);
 609         xen_set_gdt(ma, NGDT);
 610 
 611         /*
 612          * Reload the segment registers to use the new GDT.
 613          * On 64-bit, fixup KCS_SEL to be in ring 3.
 614          * See KCS_SEL in segments.h.
 615          */
 616         load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
 617 
 618         /*
 619          *  setup %gs for kernel
 620          */
 621         xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
 622 
 623         /*
 624          * XX64 We should never dereference off "other gsbase" or
 625          * "fsbase".  So, we should arrange to point FSBASE and
 626          * KGSBASE somewhere truly awful e.g. point it at the last
 627          * valid address below the hole so that any attempts to index
 628          * off them cause an exception.
 629          *
 630          * For now, point it at 8G -- at least it should be unmapped
 631          * until some 64-bit processes run.
 632          */
 633         addr = 0x200000000ul;
 634         xen_set_segment_base(SEGBASE_FS, addr);
 635         xen_set_segment_base(SEGBASE_GS_USER, addr);
 636         xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
 637 
 638         return (gdt0);
 639 }
 640 
 641 #else   /* __xpv */
 642 
 643 static user_desc_t *
 644 init_gdt(void)
 645 {
 646         desctbr_t       r_bgdt, r_gdt;
 647         user_desc_t     *bgdt;
 648 
 649 #if !defined(__lint)
 650         /*
 651          * Our gdt is never larger than a single page.
 652          */
 653         ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 654 #endif
 655         gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 656             PAGESIZE, PAGESIZE);
 657         bzero(gdt0, PAGESIZE);
 658 
 659         init_gdt_common(gdt0);
 660 
 661         /*
 662          * Copy in from boot's gdt to our gdt.
 663          * Entry 0 is the null descriptor by definition.
 664          */
 665         rd_gdtr(&r_bgdt);
 666         bgdt = (user_desc_t *)r_bgdt.dtr_base;
 667         if (bgdt == NULL)
 668                 panic("null boot gdt");
 669 
 670         gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
 671         gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
 672         gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
 673         gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
 674         gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
 675 
 676         /*
 677          * Install our new GDT
 678          */
 679         r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
 680         r_gdt.dtr_base = (uintptr_t)gdt0;
 681         wr_gdtr(&r_gdt);
 682 
 683         /*
 684          * Reload the segment registers to use the new GDT
 685          */
 686         load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
 687 
 688         /*
 689          *  setup %gs for kernel
 690          */
 691         wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
 692 
 693         /*
 694          * XX64 We should never dereference off "other gsbase" or
 695          * "fsbase".  So, we should arrange to point FSBASE and
 696          * KGSBASE somewhere truly awful e.g. point it at the last
 697          * valid address below the hole so that any attempts to index
 698          * off them cause an exception.
 699          *
 700          * For now, point it at 8G -- at least it should be unmapped
 701          * until some 64-bit processes run.
 702          */
 703         wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
 704         wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
 705         return (gdt0);
 706 }
 707 
 708 #endif  /* __xpv */
 709 
 710 #elif defined(__i386)
 711 
 712 static void
 713 init_gdt_common(user_desc_t *gdt)
 714 {
 715         int i;
 716 
 717         /*
 718          * Text and data for both kernel and user span entire 32 bit
 719          * address space.
 720          */
 721 
 722         /*
 723          * kernel code segment.
 724          */
 725         set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
 726             SDP_OP32);
 727 
 728         /*
 729          * kernel data segment.
 730          */
 731         set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
 732             SDP_OP32);
 733 
 734         /*
 735          * user code segment.
 736          */
 737         set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
 738             SDP_OP32);
 739 
 740         /*
 741          * user data segment.
 742          */
 743         set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
 744             SDP_OP32);
 745 
 746 #if !defined(__xpv)
 747 
 748         /*
 749          * TSS for T_DBLFLT (double fault) handler
 750          */
 751         set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0,
 752             sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL);
 753 
 754         /*
 755          * TSS for kernel
 756          */
 757         set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
 758             sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
 759 
 760 #endif  /* !__xpv */
 761 
 762         /*
 763          * %gs selector for kernel
 764          */
 765         set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
 766             SEL_KPL, SDP_BYTES, SDP_OP32);
 767 
 768         /*
 769          * Initialize lwp private descriptors.
 770          * Only attributes and limits are initialized, the effective
 771          * base address is programmed via fsbase/gsbase.
 772          */
 773         set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
 774             SDP_PAGES, SDP_OP32);
 775         set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
 776             SDP_PAGES, SDP_OP32);
 777 
 778         /*
 779          * Initialize the descriptors set aside for brand usage.
 780          * Only attributes and limits are initialized.
 781          */
 782         for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
 783                 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
 784                     SDP_PAGES, SDP_OP32);
 785         /*
 786          * Initialize convenient zero base user descriptor for clearing
 787          * lwp  private %fs and %gs descriptors in GDT. See setregs() for
 788          * an example.
 789          */
 790         set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
 791             SDP_BYTES, SDP_OP32);
 792 }
 793 
 794 #if defined(__xpv)
 795 
 796 static user_desc_t *
 797 init_gdt(void)
 798 {
 799         uint64_t gdtpa;
 800         ulong_t ma[1];          /* XXPV should be a memory_t */
 801 
 802 #if !defined(__lint)
 803         /*
 804          * Our gdt is never larger than a single page.
 805          */
 806         ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 807 #endif
 808         gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 809             PAGESIZE, PAGESIZE);
 810         bzero(gdt0, PAGESIZE);
 811 
 812         init_gdt_common(gdt0);
 813         gdtpa = pfn_to_pa(va_to_pfn(gdt0));
 814 
 815         /*
 816          * XXX Since we never invoke kmdb until after the kernel takes
 817          * over the descriptor tables why not have it use the kernel's
 818          * selectors?
 819          */
 820         if (boothowto & RB_DEBUG) {
 821                 set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
 822                     SDP_PAGES, SDP_OP32);
 823                 set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
 824                     SDP_PAGES, SDP_OP32);
 825         }
 826 
 827         /*
 828          * Clear write permission for page containing the gdt and install it.
 829          */
 830         ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
 831         kbm_read_only((uintptr_t)gdt0, gdtpa);
 832         xen_set_gdt(ma, NGDT);
 833 
 834         /*
 835          * Reload the segment registers to use the new GDT
 836          */
 837         load_segment_registers(
 838             KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
 839 
 840         return (gdt0);
 841 }
 842 
 843 #else   /* __xpv */
 844 
 845 static user_desc_t *
 846 init_gdt(void)
 847 {
 848         desctbr_t       r_bgdt, r_gdt;
 849         user_desc_t     *bgdt;
 850 
 851 #if !defined(__lint)
 852         /*
 853          * Our gdt is never larger than a single page.
 854          */
 855         ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 856 #endif
 857         /*
 858          * XXX this allocation belongs in our caller, not here.
 859          */
 860         gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 861             PAGESIZE, PAGESIZE);
 862         bzero(gdt0, PAGESIZE);
 863 
 864         init_gdt_common(gdt0);
 865 
 866         /*
 867          * Copy in from boot's gdt to our gdt entries.
 868          * Entry 0 is null descriptor by definition.
 869          */
 870         rd_gdtr(&r_bgdt);
 871         bgdt = (user_desc_t *)r_bgdt.dtr_base;
 872         if (bgdt == NULL)
 873                 panic("null boot gdt");
 874 
 875         gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
 876         gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
 877         gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
 878         gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
 879 
 880         /*
 881          * Install our new GDT
 882          */
 883         r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
 884         r_gdt.dtr_base = (uintptr_t)gdt0;
 885         wr_gdtr(&r_gdt);
 886 
 887         /*
 888          * Reload the segment registers to use the new GDT
 889          */
 890         load_segment_registers(
 891             KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
 892 
 893         return (gdt0);
 894 }
 895 
 896 #endif  /* __xpv */
 897 #endif  /* __i386 */
 898 
 899 /*
 900  * Build kernel IDT.
 901  *
 902  * Note that for amd64 we pretty much require every gate to be an interrupt
 903  * gate which blocks interrupts atomically on entry; that's because of our
 904  * dependency on using 'swapgs' every time we come into the kernel to find
 905  * the cpu structure. If we get interrupted just before doing that, %cs could
 906  * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
 907  * %gsbase is really still pointing at something in userland. Bad things will
 908  * ensue. We also use interrupt gates for i386 as well even though this is not
 909  * required for some traps.
 910  *
 911  * Perhaps they should have invented a trap gate that does an atomic swapgs?
 912  */
 913 static void
 914 init_idt_common(gate_desc_t *idt)
 915 {
 916         set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 917             0);
 918         set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 919             0);
 920         set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 921             0);
 922         set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 923             0);
 924         set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 925             0);
 926         set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
 927             TRP_KPL, 0);
 928         set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 929             0);
 930         set_gatesegd(&idt[T_NOEXTFLT], &ndptrap,  KCS_SEL, SDT_SYSIGT, TRP_KPL,
 931             0);
 932 
 933         /*
 934          * double fault handler.
 935          *
 936          * Note that on the hypervisor a guest does not receive #df faults.
 937          * Instead a failsafe event is injected into the guest if its selectors
 938          * and/or stack is in a broken state. See xen_failsafe_callback.
 939          */
 940 #if !defined(__xpv)
 941 #if defined(__amd64)
 942 
 943         set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 944             T_DBLFLT);
 945 
 946 #elif defined(__i386)
 947 
 948         /*
 949          * task gate required.
 950          */
 951         set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL,
 952             0);
 953 
 954 #endif  /* __i386 */
 955 #endif  /* !__xpv */
 956 
 957         /*
 958          * T_EXTOVRFLT coprocessor-segment-overrun not supported.
 959          */
 960 
 961         set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 962             0);
 963         set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 964             0);
 965         set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 966         set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 967         set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 968         set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 969             0);
 970         set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
 971             TRP_KPL, 0);
 972         set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 973         set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 974 
 975         /*
 976          * install "int80" handler at, well, 0x80.
 977          */
 978         set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 979             0);
 980 
 981         /*
 982          * install fast trap handler at 210.
 983          */
 984         set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 985             0);
 986 
 987         /*
 988          * System call handler.
 989          */
 990 #if defined(__amd64)
 991         set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
 992             TRP_UPL, 0);
 993 
 994 #elif defined(__i386)
 995         set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
 996             TRP_UPL, 0);
 997 #endif  /* __i386 */
 998 
 999         /*
1000          * Install the DTrace interrupt handler for the pid provider.
1001          */
1002         set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
1003             SDT_SYSIGT, TRP_UPL, 0);
1004 
1005         /*
1006          * Prepare interposing descriptors for the branded "int80"
1007          * and syscall handlers and cache copies of the default
1008          * descriptors.
1009          */
1010         brand_tbl[0].ih_inum = T_INT80;
1011         brand_tbl[0].ih_default_desc = idt0[T_INT80];
1012         set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL,
1013             SDT_SYSIGT, TRP_UPL, 0);
1014 
1015         brand_tbl[1].ih_inum = T_SYSCALLINT;
1016         brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT];
1017 
1018 #if defined(__amd64)
1019         set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int,
1020             KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1021 #elif defined(__i386)
1022         set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call,
1023             KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1024 #endif  /* __i386 */
1025 
1026         brand_tbl[2].ih_inum = 0;
1027 }
1028 
1029 #if defined(__xpv)
1030 
1031 static void
1032 init_idt(gate_desc_t *idt)
1033 {
1034         init_idt_common(idt);
1035 }
1036 
1037 #else   /* __xpv */
1038 
1039 static void
1040 init_idt(gate_desc_t *idt)
1041 {
1042         char    ivctname[80];
1043         void    (*ivctptr)(void);
1044         int     i;
1045 
1046         /*
1047          * Initialize entire table with 'reserved' trap and then overwrite
1048          * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1049          * since it can only be generated on a 386 processor. 15 is also
1050          * unsupported and reserved.
1051          */
1052         for (i = 0; i < NIDT; i++)
1053                 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1054                     0);
1055 
1056         /*
1057          * 20-31 reserved
1058          */
1059         for (i = 20; i < 32; i++)
1060                 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1061                     0);
1062 
1063         /*
1064          * interrupts 32 - 255
1065          */
1066         for (i = 32; i < 256; i++) {
1067                 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
1068                 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1069                 if (ivctptr == NULL)
1070                         panic("kobj_getsymvalue(%s) failed", ivctname);
1071 
1072                 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
1073         }
1074 
1075         /*
1076          * Now install the common ones. Note that it will overlay some
1077          * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1078          */
1079         init_idt_common(idt);
1080 }
1081 
1082 #endif  /* __xpv */
1083 
1084 /*
1085  * The kernel does not deal with LDTs unless a user explicitly creates
1086  * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1087  * to reference the LDT will therefore cause a #gp. System calls made via the
1088  * obsolete lcall mechanism are emulated by the #gp fault handler.
1089  */
1090 static void
1091 init_ldt(void)
1092 {
1093 #if defined(__xpv)
1094         xen_set_ldt(NULL, 0);
1095 #else
1096         wr_ldtr(0);
1097 #endif
1098 }
1099 
1100 #if !defined(__xpv)
1101 #if defined(__amd64)
1102 
1103 static void
1104 init_tss(void)
1105 {
1106         /*
1107          * tss_rsp0 is dynamically filled in by resume() on each context switch.
1108          * All exceptions but #DF will run on the thread stack.
1109          * Set up the double fault stack here.
1110          */
1111         ktss0->tss_ist1 =
1112             (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1113 
1114         /*
1115          * Set I/O bit map offset equal to size of TSS segment limit
1116          * for no I/O permission map. This will force all user I/O
1117          * instructions to generate #gp fault.
1118          */
1119         ktss0->tss_bitmapbase = sizeof (*ktss0);
1120 
1121         /*
1122          * Point %tr to descriptor for ktss0 in gdt.
1123          */
1124         wr_tsr(KTSS_SEL);
1125 }
1126 
1127 #elif defined(__i386)
1128 
1129 static void
1130 init_tss(void)
1131 {
1132         /*
1133          * ktss0->tss_esp dynamically filled in by resume() on each
1134          * context switch.
1135          */
1136         ktss0->tss_ss0       = KDS_SEL;
1137         ktss0->tss_eip       = (uint32_t)_start;
1138         ktss0->tss_ds        = ktss0->tss_es = ktss0->tss_ss = KDS_SEL;
1139         ktss0->tss_cs        = KCS_SEL;
1140         ktss0->tss_fs        = KFS_SEL;
1141         ktss0->tss_gs        = KGS_SEL;
1142         ktss0->tss_ldt       = ULDT_SEL;
1143 
1144         /*
1145          * Initialize double fault tss.
1146          */
1147         dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1148         dftss0->tss_ss0      = KDS_SEL;
1149 
1150         /*
1151          * tss_cr3 will get initialized in hat_kern_setup() once our page
1152          * tables have been setup.
1153          */
1154         dftss0->tss_eip      = (uint32_t)syserrtrap;
1155         dftss0->tss_esp      = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1156         dftss0->tss_cs       = KCS_SEL;
1157         dftss0->tss_ds       = KDS_SEL;
1158         dftss0->tss_es       = KDS_SEL;
1159         dftss0->tss_ss       = KDS_SEL;
1160         dftss0->tss_fs       = KFS_SEL;
1161         dftss0->tss_gs       = KGS_SEL;
1162 
1163         /*
1164          * Set I/O bit map offset equal to size of TSS segment limit
1165          * for no I/O permission map. This will force all user I/O
1166          * instructions to generate #gp fault.
1167          */
1168         ktss0->tss_bitmapbase = sizeof (*ktss0);
1169 
1170         /*
1171          * Point %tr to descriptor for ktss0 in gdt.
1172          */
1173         wr_tsr(KTSS_SEL);
1174 }
1175 
1176 #endif  /* __i386 */
1177 #endif  /* !__xpv */
1178 
1179 #if defined(__xpv)
1180 
1181 void
1182 init_desctbls(void)
1183 {
1184         uint_t vec;
1185         user_desc_t *gdt;
1186 
1187         /*
1188          * Setup and install our GDT.
1189          */
1190         gdt = init_gdt();
1191 
1192         /*
1193          * Store static pa of gdt to speed up pa_to_ma() translations
1194          * on lwp context switches.
1195          */
1196         ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1197         CPU->cpu_gdt = gdt;
1198         CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
1199 
1200         /*
1201          * Setup and install our IDT.
1202          */
1203 #if !defined(__lint)
1204         ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1205 #endif
1206         idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1207             PAGESIZE, PAGESIZE);
1208         bzero(idt0, PAGESIZE);
1209         init_idt(idt0);
1210         for (vec = 0; vec < NIDT; vec++)
1211                 xen_idt_write(&idt0[vec], vec);
1212 
1213         CPU->cpu_idt = idt0;
1214 
1215         /*
1216          * set default kernel stack
1217          */
1218         xen_stack_switch(KDS_SEL,
1219             (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1220 
1221         xen_init_callbacks();
1222 
1223         init_ldt();
1224 }
1225 
1226 #else   /* __xpv */
1227 
1228 void
1229 init_desctbls(void)
1230 {
1231         user_desc_t *gdt;
1232         desctbr_t idtr;
1233 
1234         /*
1235          * Allocate IDT and TSS structures on unique pages for better
1236          * performance in virtual machines.
1237          */
1238 #if !defined(__lint)
1239         ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1240 #endif
1241         idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1242             PAGESIZE, PAGESIZE);
1243         bzero(idt0, PAGESIZE);
1244 #if !defined(__lint)
1245         ASSERT(sizeof (*ktss0) <= PAGESIZE);
1246 #endif
1247         ktss0 = (struct tss *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
1248             PAGESIZE, PAGESIZE);
1249         bzero(ktss0, PAGESIZE);
1250 
1251 #if defined(__i386)
1252 #if !defined(__lint)
1253         ASSERT(sizeof (*dftss0) <= PAGESIZE);
1254 #endif
1255         dftss0 = (struct tss *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1256             PAGESIZE, PAGESIZE);
1257         bzero(dftss0, PAGESIZE);
1258 #endif
1259 
1260         /*
1261          * Setup and install our GDT.
1262          */
1263         gdt = init_gdt();
1264         ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1265         CPU->cpu_gdt = gdt;
1266 
1267         /*
1268          * Setup and install our IDT.
1269          */
1270         init_idt(idt0);
1271 
1272         idtr.dtr_base = (uintptr_t)idt0;
1273         idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1274         wr_idtr(&idtr);
1275         CPU->cpu_idt = idt0;
1276 
1277 #if defined(__i386)
1278         /*
1279          * We maintain a description of idt0 in convenient IDTR format
1280          * for #pf's on some older pentium processors. See pentium_pftrap().
1281          */
1282         idt0_default_r = idtr;
1283 #endif  /* __i386 */
1284 
1285         init_tss();
1286         CPU->cpu_tss = ktss0;
1287         init_ldt();
1288 }
1289 
1290 #endif  /* __xpv */
1291 
1292 /*
1293  * In the early kernel, we need to set up a simple GDT to run on.
1294  *
1295  * XXPV Can dboot use this too?  See dboot_gdt.s
1296  */
1297 void
1298 init_boot_gdt(user_desc_t *bgdt)
1299 {
1300 #if defined(__amd64)
1301         set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1302             SDP_PAGES, SDP_OP32);
1303         set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1304             SDP_PAGES, SDP_OP32);
1305 #elif defined(__i386)
1306         set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1307             SDP_PAGES, SDP_OP32);
1308         set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
1309             SDP_PAGES, SDP_OP32);
1310 #endif  /* __i386 */
1311 }
1312 
1313 /*
1314  * Enable interpositioning on the system call path by rewriting the
1315  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1316  * the branded entry points.
1317  */
1318 void
1319 brand_interpositioning_enable(void)
1320 {
1321         gate_desc_t     *idt = CPU->cpu_idt;
1322         int             i;
1323 
1324         ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1325 
1326         for (i = 0; brand_tbl[i].ih_inum; i++) {
1327                 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1328 #if defined(__xpv)
1329                 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1330                     brand_tbl[i].ih_inum);
1331 #endif
1332         }
1333 
1334 #if defined(__amd64)
1335 #if defined(__xpv)
1336 
1337         /*
1338          * Currently the hypervisor only supports 64-bit syscalls via
1339          * syscall instruction. The 32-bit syscalls are handled by
1340          * interrupt gate above.
1341          */
1342         xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1343             CALLBACKF_mask_events);
1344 
1345 #else
1346 
1347         if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1348                 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1349                 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1350         }
1351 
1352 #endif
1353 #endif  /* __amd64 */
1354 
1355         if (is_x86_feature(x86_featureset, X86FSET_SEP))
1356                 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1357 }
1358 
1359 /*
1360  * Disable interpositioning on the system call path by rewriting the
1361  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1362  * the standard entry points, which bypass the interpositioning hooks.
1363  */
1364 void
1365 brand_interpositioning_disable(void)
1366 {
1367         gate_desc_t     *idt = CPU->cpu_idt;
1368         int i;
1369 
1370         ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1371 
1372         for (i = 0; brand_tbl[i].ih_inum; i++) {
1373                 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1374 #if defined(__xpv)
1375                 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1376                     brand_tbl[i].ih_inum);
1377 #endif
1378         }
1379 
1380 #if defined(__amd64)
1381 #if defined(__xpv)
1382 
1383         /*
1384          * See comment above in brand_interpositioning_enable.
1385          */
1386         xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1387             CALLBACKF_mask_events);
1388 
1389 #else
1390 
1391         if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1392                 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1393                 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1394         }
1395 
1396 #endif
1397 #endif  /* __amd64 */
1398 
1399         if (is_x86_feature(x86_featureset, X86FSET_SEP))
1400                 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1401 }