1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright (c) 1992 Terrence R. Lambert.
  28  * Copyright (c) 1990 The Regents of the University of California.
  29  * All rights reserved.
  30  *
  31  * This code is derived from software contributed to Berkeley by
  32  * William Jolitz.
  33  *
  34  * Redistribution and use in source and binary forms, with or without
  35  * modification, are permitted provided that the following conditions
  36  * are met:
  37  * 1. Redistributions of source code must retain the above copyright
  38  *    notice, this list of conditions and the following disclaimer.
  39  * 2. Redistributions in binary form must reproduce the above copyright
  40  *    notice, this list of conditions and the following disclaimer in the
  41  *    documentation and/or other materials provided with the distribution.
  42  * 3. All advertising materials mentioning features or use of this software
  43  *    must display the following acknowledgement:
  44  *      This product includes software developed by the University of
  45  *      California, Berkeley and its contributors.
  46  * 4. Neither the name of the University nor the names of its contributors
  47  *    may be used to endorse or promote products derived from this software
  48  *    without specific prior written permission.
  49  *
  50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  60  * SUCH DAMAGE.
  61  *
  62  *      from: @(#)machdep.c     7.4 (Berkeley) 6/3/91
  63  */
  64 
  65 #include <sys/types.h>
  66 #include <sys/sysmacros.h>
  67 #include <sys/tss.h>
  68 #include <sys/segments.h>
  69 #include <sys/trap.h>
  70 #include <sys/cpuvar.h>
  71 #include <sys/bootconf.h>
  72 #include <sys/x86_archext.h>
  73 #include <sys/controlregs.h>
  74 #include <sys/archsystm.h>
  75 #include <sys/machsystm.h>
  76 #include <sys/kobj.h>
  77 #include <sys/cmn_err.h>
  78 #include <sys/reboot.h>
  79 #include <sys/kdi.h>
  80 #include <sys/mach_mmu.h>
  81 #include <sys/systm.h>
  82 
  83 #ifdef __xpv
  84 #include <sys/hypervisor.h>
  85 #include <vm/as.h>
  86 #endif
  87 
  88 #include <sys/promif.h>
  89 #include <sys/bootinfo.h>
  90 #include <vm/kboot_mmu.h>
  91 #include <vm/hat_pte.h>
  92 
  93 /*
  94  * cpu0 and default tables and structures.
  95  */
  96 user_desc_t     *gdt0;
  97 #if !defined(__xpv)
  98 desctbr_t       gdt0_default_r;
  99 #endif
 100 
 101 gate_desc_t     *idt0;          /* interrupt descriptor table */
 102 #if defined(__i386)
 103 desctbr_t       idt0_default_r;         /* describes idt0 in IDTR format */
 104 #endif
 105 
 106 struct tss      *ktss0;                 /* kernel task state structure */
 107 
 108 #if defined(__i386)
 109 struct tss      *dftss0;                /* #DF double-fault exception */
 110 #endif  /* __i386 */
 111 
 112 user_desc_t     zero_udesc;             /* base zero user desc native procs */
 113 user_desc_t     null_udesc;             /* null user descriptor */
 114 system_desc_t   null_sdesc;             /* null system descriptor */
 115 
 116 #if defined(__amd64)
 117 user_desc_t     zero_u32desc;           /* 32-bit compatibility procs */
 118 #endif  /* __amd64 */
 119 
 120 #if defined(__amd64)
 121 user_desc_t     ucs_on;
 122 user_desc_t     ucs_off;
 123 user_desc_t     ucs32_on;
 124 user_desc_t     ucs32_off;
 125 #endif  /* __amd64 */
 126 
 127 #pragma align   16(dblfault_stack0)
 128 char            dblfault_stack0[DEFAULTSTKSZ];
 129 
 130 extern void     fast_null(void);
 131 extern hrtime_t get_hrtime(void);
 132 extern hrtime_t gethrvtime(void);
 133 extern hrtime_t get_hrestime(void);
 134 extern uint64_t getlgrp(void);
 135 
 136 void (*(fasttable[]))(void) = {
 137         fast_null,                      /* T_FNULL routine */
 138         fast_null,                      /* T_FGETFP routine (initially null) */
 139         fast_null,                      /* T_FSETFP routine (initially null) */
 140         (void (*)())get_hrtime,         /* T_GETHRTIME */
 141         (void (*)())gethrvtime,         /* T_GETHRVTIME */
 142         (void (*)())get_hrestime,       /* T_GETHRESTIME */
 143         (void (*)())getlgrp             /* T_GETLGRP */
 144 };
 145 
 146 /*
 147  * Structure containing pre-computed descriptors to allow us to temporarily
 148  * interpose on a standard handler.
 149  */
 150 struct interposing_handler {
 151         int ih_inum;
 152         gate_desc_t ih_interp_desc;
 153         gate_desc_t ih_default_desc;
 154 };
 155 
 156 /*
 157  * The brand infrastructure interposes on two handlers, and we use one as a
 158  * NULL signpost.
 159  */
 160 static struct interposing_handler brand_tbl[2];
 161 
 162 /*
 163  * software prototypes for default local descriptor table
 164  */
 165 
 166 /*
 167  * Routines for loading segment descriptors in format the hardware
 168  * can understand.
 169  */
 170 
 171 #if defined(__amd64)
 172 
 173 /*
 174  * In long mode we have the new L or long mode attribute bit
 175  * for code segments. Only the conforming bit in type is used along
 176  * with descriptor priority and present bits. Default operand size must
 177  * be zero when in long mode. In 32-bit compatibility mode all fields
 178  * are treated as in legacy mode. For data segments while in long mode
 179  * only the present bit is loaded.
 180  */
 181 void
 182 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
 183     uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
 184 {
 185         ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
 186 
 187         /*
 188          * 64-bit long mode.
 189          */
 190         if (lmode == SDP_LONG)
 191                 dp->usd_def32 = 0;           /* 32-bit operands only */
 192         else
 193                 /*
 194                  * 32-bit compatibility mode.
 195                  */
 196                 dp->usd_def32 = defopsz;     /* 0 = 16, 1 = 32-bit ops */
 197 
 198         dp->usd_long = lmode;        /* 64-bit mode */
 199         dp->usd_type = type;
 200         dp->usd_dpl = dpl;
 201         dp->usd_p = 1;
 202         dp->usd_gran = gran;         /* 0 = bytes, 1 = pages */
 203 
 204         dp->usd_lobase = (uintptr_t)base;
 205         dp->usd_midbase = (uintptr_t)base >> 16;
 206         dp->usd_hibase = (uintptr_t)base >> (16 + 8);
 207         dp->usd_lolimit = size;
 208         dp->usd_hilimit = (uintptr_t)size >> 16;
 209 }
 210 
 211 #elif defined(__i386)
 212 
 213 /*
 214  * Install user segment descriptor for code and data.
 215  */
 216 void
 217 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
 218     uint_t dpl, uint_t gran, uint_t defopsz)
 219 {
 220         dp->usd_lolimit = size;
 221         dp->usd_hilimit = (uintptr_t)size >> 16;
 222 
 223         dp->usd_lobase = (uintptr_t)base;
 224         dp->usd_midbase = (uintptr_t)base >> 16;
 225         dp->usd_hibase = (uintptr_t)base >> (16 + 8);
 226 
 227         dp->usd_type = type;
 228         dp->usd_dpl = dpl;
 229         dp->usd_p = 1;
 230         dp->usd_def32 = defopsz;     /* 0 = 16, 1 = 32 bit operands */
 231         dp->usd_gran = gran;         /* 0 = bytes, 1 = pages */
 232 }
 233 
 234 #endif  /* __i386 */
 235 
 236 /*
 237  * Install system segment descriptor for LDT and TSS segments.
 238  */
 239 
 240 #if defined(__amd64)
 241 
 242 void
 243 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
 244     uint_t dpl)
 245 {
 246         dp->ssd_lolimit = size;
 247         dp->ssd_hilimit = (uintptr_t)size >> 16;
 248 
 249         dp->ssd_lobase = (uintptr_t)base;
 250         dp->ssd_midbase = (uintptr_t)base >> 16;
 251         dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
 252         dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
 253 
 254         dp->ssd_type = type;
 255         dp->ssd_zero1 = 0;   /* must be zero */
 256         dp->ssd_zero2 = 0;
 257         dp->ssd_dpl = dpl;
 258         dp->ssd_p = 1;
 259         dp->ssd_gran = 0;    /* force byte units */
 260 }
 261 
 262 void *
 263 get_ssd_base(system_desc_t *dp)
 264 {
 265         uintptr_t       base;
 266 
 267         base = (uintptr_t)dp->ssd_lobase |
 268             (uintptr_t)dp->ssd_midbase << 16 |
 269             (uintptr_t)dp->ssd_hibase << (16 + 8) |
 270             (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
 271         return ((void *)base);
 272 }
 273 
 274 #elif defined(__i386)
 275 
 276 void
 277 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
 278     uint_t dpl)
 279 {
 280         dp->ssd_lolimit = size;
 281         dp->ssd_hilimit = (uintptr_t)size >> 16;
 282 
 283         dp->ssd_lobase = (uintptr_t)base;
 284         dp->ssd_midbase = (uintptr_t)base >> 16;
 285         dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
 286 
 287         dp->ssd_type = type;
 288         dp->ssd_zero = 0;    /* must be zero */
 289         dp->ssd_dpl = dpl;
 290         dp->ssd_p = 1;
 291         dp->ssd_gran = 0;    /* force byte units */
 292 }
 293 
 294 void *
 295 get_ssd_base(system_desc_t *dp)
 296 {
 297         uintptr_t       base;
 298 
 299         base = (uintptr_t)dp->ssd_lobase |
 300             (uintptr_t)dp->ssd_midbase << 16 |
 301             (uintptr_t)dp->ssd_hibase << (16 + 8);
 302         return ((void *)base);
 303 }
 304 
 305 #endif  /* __i386 */
 306 
 307 /*
 308  * Install gate segment descriptor for interrupt, trap, call and task gates.
 309  */
 310 
 311 #if defined(__amd64)
 312 
 313 /*ARGSUSED*/
 314 void
 315 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
 316     uint_t type, uint_t dpl, uint_t vector)
 317 {
 318         dp->sgd_looffset = (uintptr_t)func;
 319         dp->sgd_hioffset = (uintptr_t)func >> 16;
 320         dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
 321 
 322         dp->sgd_selector =  (uint16_t)sel;
 323 
 324         /*
 325          * For 64 bit native we use the IST stack mechanism
 326          * for double faults. All other traps use the CPL = 0
 327          * (tss_rsp0) stack.
 328          */
 329 #if !defined(__xpv)
 330         if (vector == T_DBLFLT)
 331                 dp->sgd_ist = 1;
 332         else
 333 #endif
 334                 dp->sgd_ist = 0;
 335 
 336         dp->sgd_type = type;
 337         dp->sgd_dpl = dpl;
 338         dp->sgd_p = 1;
 339 }
 340 
 341 #elif defined(__i386)
 342 
 343 /*ARGSUSED*/
 344 void
 345 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
 346     uint_t type, uint_t dpl, uint_t unused)
 347 {
 348         dp->sgd_looffset = (uintptr_t)func;
 349         dp->sgd_hioffset = (uintptr_t)func >> 16;
 350 
 351         dp->sgd_selector =  (uint16_t)sel;
 352         dp->sgd_stkcpy = 0;  /* always zero bytes */
 353         dp->sgd_type = type;
 354         dp->sgd_dpl = dpl;
 355         dp->sgd_p = 1;
 356 }
 357 
 358 #endif  /* __i386 */
 359 
 360 /*
 361  * Updates a single user descriptor in the the GDT of the current cpu.
 362  * Caller is responsible for preventing cpu migration.
 363  */
 364 
 365 void
 366 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
 367 {
 368 #if defined(__xpv)
 369 
 370         uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
 371 
 372         if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
 373                 panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
 374 
 375 #else   /* __xpv */
 376 
 377         CPU->cpu_gdt[sidx] = *udp;
 378 
 379 #endif  /* __xpv */
 380 }
 381 
 382 /*
 383  * Writes single descriptor pointed to by udp into a processes
 384  * LDT entry pointed to by ldp.
 385  */
 386 int
 387 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
 388 {
 389 #if defined(__xpv)
 390 
 391         uint64_t dpa;
 392 
 393         dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
 394             ((uintptr_t)ldp & PAGEOFFSET);
 395 
 396         /*
 397          * The hypervisor is a little more restrictive about what it
 398          * supports in the LDT.
 399          */
 400         if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
 401                 return (EINVAL);
 402 
 403 #else   /* __xpv */
 404 
 405         *ldp = *udp;
 406 
 407 #endif  /* __xpv */
 408         return (0);
 409 }
 410 
 411 #if defined(__xpv)
 412 
 413 /*
 414  * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
 415  * Returns true if a valid entry was written.
 416  */
 417 int
 418 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
 419 {
 420         trap_info_t *ti = ti_arg;       /* XXPV Aargh - segments.h comment */
 421 
 422         /*
 423          * skip holes in the IDT
 424          */
 425         if (GATESEG_GETOFFSET(sgd) == 0)
 426                 return (0);
 427 
 428         ASSERT(sgd->sgd_type == SDT_SYSIGT);
 429         ti->vector = vec;
 430         TI_SET_DPL(ti, sgd->sgd_dpl);
 431 
 432         /*
 433          * Is this an interrupt gate?
 434          */
 435         if (sgd->sgd_type == SDT_SYSIGT) {
 436                 /* LINTED */
 437                 TI_SET_IF(ti, 1);
 438         }
 439         ti->cs = sgd->sgd_selector;
 440 #if defined(__amd64)
 441         ti->cs |= SEL_KPL;   /* force into ring 3. see KCS_SEL  */
 442 #endif
 443         ti->address = GATESEG_GETOFFSET(sgd);
 444         return (1);
 445 }
 446 
 447 /*
 448  * Convert a single hw format gate descriptor and write it into our virtual IDT.
 449  */
 450 void
 451 xen_idt_write(gate_desc_t *sgd, uint_t vec)
 452 {
 453         trap_info_t trapinfo[2];
 454 
 455         bzero(trapinfo, sizeof (trapinfo));
 456         if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
 457                 return;
 458         if (xen_set_trap_table(trapinfo) != 0)
 459                 panic("xen_idt_write: xen_set_trap_table() failed");
 460 }
 461 
 462 #endif  /* __xpv */
 463 
 464 #if defined(__amd64)
 465 
 466 /*
 467  * Build kernel GDT.
 468  */
 469 
 470 static void
 471 init_gdt_common(user_desc_t *gdt)
 472 {
 473         int i;
 474 
 475         /*
 476          * 64-bit kernel code segment.
 477          */
 478         set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
 479             SDP_PAGES, SDP_OP32);
 480 
 481         /*
 482          * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
 483          * mode, but we set it here to 0xFFFF so that we can use the SYSRET
 484          * instruction to return from system calls back to 32-bit applications.
 485          * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
 486          * descriptors. We therefore must ensure that the kernel uses something,
 487          * though it will be ignored by hardware, that is compatible with 32-bit
 488          * apps. For the same reason we must set the default op size of this
 489          * descriptor to 32-bit operands.
 490          */
 491         set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
 492             SEL_KPL, SDP_PAGES, SDP_OP32);
 493         gdt[GDT_KDATA].usd_def32 = 1;
 494 
 495         /*
 496          * 64-bit user code segment.
 497          */
 498         set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
 499             SDP_PAGES, SDP_OP32);
 500 
 501         /*
 502          * 32-bit user code segment.
 503          */
 504         set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
 505             SEL_UPL, SDP_PAGES, SDP_OP32);
 506 
 507         /*
 508          * See gdt_ucode32() and gdt_ucode_native().
 509          */
 510         ucs_on = ucs_off = gdt[GDT_UCODE];
 511         ucs_off.usd_p = 0;      /* forces #np fault */
 512 
 513         ucs32_on = ucs32_off = gdt[GDT_U32CODE];
 514         ucs32_off.usd_p = 0;    /* forces #np fault */
 515 
 516         /*
 517          * 32 and 64 bit data segments can actually share the same descriptor.
 518          * In long mode only the present bit is checked but all other fields
 519          * are loaded. But in compatibility mode all fields are interpreted
 520          * as in legacy mode so they must be set correctly for a 32-bit data
 521          * segment.
 522          */
 523         set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
 524             SDP_PAGES, SDP_OP32);
 525 
 526 #if !defined(__xpv)
 527 
 528         /*
 529          * The 64-bit kernel has no default LDT. By default, the LDT descriptor
 530          * in the GDT is 0.
 531          */
 532 
 533         /*
 534          * Kernel TSS
 535          */
 536         set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
 537             sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
 538 
 539 #endif  /* !__xpv */
 540 
 541         /*
 542          * Initialize fs and gs descriptors for 32 bit processes.
 543          * Only attributes and limits are initialized, the effective
 544          * base address is programmed via fsbase/gsbase.
 545          */
 546         set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
 547             SEL_UPL, SDP_PAGES, SDP_OP32);
 548         set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
 549             SEL_UPL, SDP_PAGES, SDP_OP32);
 550 
 551         /*
 552          * Initialize the descriptors set aside for brand usage.
 553          * Only attributes and limits are initialized.
 554          */
 555         for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
 556                 set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
 557                     SEL_UPL, SDP_PAGES, SDP_OP32);
 558 
 559         /*
 560          * Initialize convenient zero base user descriptors for clearing
 561          * lwp private %fs and %gs descriptors in GDT. See setregs() for
 562          * an example.
 563          */
 564         set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
 565             SDP_BYTES, SDP_OP32);
 566         set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
 567             SDP_PAGES, SDP_OP32);
 568 }
 569 
 570 #if defined(__xpv)
 571 
 572 static user_desc_t *
 573 init_gdt(void)
 574 {
 575         uint64_t gdtpa;
 576         ulong_t ma[1];          /* XXPV should be a memory_t */
 577         ulong_t addr;
 578 
 579 #if !defined(__lint)
 580         /*
 581          * Our gdt is never larger than a single page.
 582          */
 583         ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 584 #endif
 585         gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 586             PAGESIZE, PAGESIZE);
 587         bzero(gdt0, PAGESIZE);
 588 
 589         init_gdt_common(gdt0);
 590 
 591         /*
 592          * XXX Since we never invoke kmdb until after the kernel takes
 593          * over the descriptor tables why not have it use the kernel's
 594          * selectors?
 595          */
 596         if (boothowto & RB_DEBUG) {
 597                 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
 598                     SEL_KPL, SDP_PAGES, SDP_OP32);
 599                 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
 600                     SEL_KPL, SDP_PAGES, SDP_OP32);
 601         }
 602 
 603         /*
 604          * Clear write permission for page containing the gdt and install it.
 605          */
 606         gdtpa = pfn_to_pa(va_to_pfn(gdt0));
 607         ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
 608         kbm_read_only((uintptr_t)gdt0, gdtpa);
 609         xen_set_gdt(ma, NGDT);
 610 
 611         /*
 612          * Reload the segment registers to use the new GDT.
 613          * On 64-bit, fixup KCS_SEL to be in ring 3.
 614          * See KCS_SEL in segments.h.
 615          */
 616         load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
 617 
 618         /*
 619          *  setup %gs for kernel
 620          */
 621         xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
 622 
 623         /*
 624          * XX64 We should never dereference off "other gsbase" or
 625          * "fsbase".  So, we should arrange to point FSBASE and
 626          * KGSBASE somewhere truly awful e.g. point it at the last
 627          * valid address below the hole so that any attempts to index
 628          * off them cause an exception.
 629          *
 630          * For now, point it at 8G -- at least it should be unmapped
 631          * until some 64-bit processes run.
 632          */
 633         addr = 0x200000000ul;
 634         xen_set_segment_base(SEGBASE_FS, addr);
 635         xen_set_segment_base(SEGBASE_GS_USER, addr);
 636         xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
 637 
 638         return (gdt0);
 639 }
 640 
 641 #else   /* __xpv */
 642 
 643 static user_desc_t *
 644 init_gdt(void)
 645 {
 646         desctbr_t       r_bgdt, r_gdt;
 647         user_desc_t     *bgdt;
 648 
 649 #if !defined(__lint)
 650         /*
 651          * Our gdt is never larger than a single page.
 652          */
 653         ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 654 #endif
 655         gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 656             PAGESIZE, PAGESIZE);
 657         bzero(gdt0, PAGESIZE);
 658 
 659         init_gdt_common(gdt0);
 660 
 661         /*
 662          * Copy in from boot's gdt to our gdt.
 663          * Entry 0 is the null descriptor by definition.
 664          */
 665         rd_gdtr(&r_bgdt);
 666         bgdt = (user_desc_t *)r_bgdt.dtr_base;
 667         if (bgdt == NULL)
 668                 panic("null boot gdt");
 669 
 670         gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
 671         gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
 672         gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
 673         gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
 674         gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
 675 
 676         /*
 677          * Install our new GDT
 678          */
 679         r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
 680         r_gdt.dtr_base = (uintptr_t)gdt0;
 681         wr_gdtr(&r_gdt);
 682 
 683         /*
 684          * Reload the segment registers to use the new GDT
 685          */
 686         load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
 687 
 688         /*
 689          *  setup %gs for kernel
 690          */
 691         wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
 692 
 693         /*
 694          * XX64 We should never dereference off "other gsbase" or
 695          * "fsbase".  So, we should arrange to point FSBASE and
 696          * KGSBASE somewhere truly awful e.g. point it at the last
 697          * valid address below the hole so that any attempts to index
 698          * off them cause an exception.
 699          *
 700          * For now, point it at 8G -- at least it should be unmapped
 701          * until some 64-bit processes run.
 702          */
 703         wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
 704         wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
 705         return (gdt0);
 706 }
 707 
 708 #endif  /* __xpv */
 709 
 710 #elif defined(__i386)
 711 
 712 static void
 713 init_gdt_common(user_desc_t *gdt)
 714 {
 715         int i;
 716 
 717         /*
 718          * Text and data for both kernel and user span entire 32 bit
 719          * address space.
 720          */
 721 
 722         /*
 723          * kernel code segment.
 724          */
 725         set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
 726             SDP_OP32);
 727 
 728         /*
 729          * kernel data segment.
 730          */
 731         set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
 732             SDP_OP32);
 733 
 734         /*
 735          * user code segment.
 736          */
 737         set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
 738             SDP_OP32);
 739 
 740         /*
 741          * user data segment.
 742          */
 743         set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
 744             SDP_OP32);
 745 
 746 #if !defined(__xpv)
 747 
 748         /*
 749          * TSS for T_DBLFLT (double fault) handler
 750          */
 751         set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0,
 752             sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL);
 753 
 754         /*
 755          * TSS for kernel
 756          */
 757         set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
 758             sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
 759 
 760 #endif  /* !__xpv */
 761 
 762         /*
 763          * %gs selector for kernel
 764          */
 765         set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
 766             SEL_KPL, SDP_BYTES, SDP_OP32);
 767 
 768         /*
 769          * Initialize lwp private descriptors.
 770          * Only attributes and limits are initialized, the effective
 771          * base address is programmed via fsbase/gsbase.
 772          */
 773         set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
 774             SDP_PAGES, SDP_OP32);
 775         set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
 776             SDP_PAGES, SDP_OP32);
 777 
 778         /*
 779          * Initialize the descriptors set aside for brand usage.
 780          * Only attributes and limits are initialized.
 781          */
 782         for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
 783                 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
 784                     SDP_PAGES, SDP_OP32);
 785         /*
 786          * Initialize convenient zero base user descriptor for clearing
 787          * lwp  private %fs and %gs descriptors in GDT. See setregs() for
 788          * an example.
 789          */
 790         set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
 791             SDP_BYTES, SDP_OP32);
 792 }
 793 
 794 #if defined(__xpv)
 795 
 796 static user_desc_t *
 797 init_gdt(void)
 798 {
 799         uint64_t gdtpa;
 800         ulong_t ma[1];          /* XXPV should be a memory_t */
 801 
 802 #if !defined(__lint)
 803         /*
 804          * Our gdt is never larger than a single page.
 805          */
 806         ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 807 #endif
 808         gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 809             PAGESIZE, PAGESIZE);
 810         bzero(gdt0, PAGESIZE);
 811 
 812         init_gdt_common(gdt0);
 813         gdtpa = pfn_to_pa(va_to_pfn(gdt0));
 814 
 815         /*
 816          * XXX Since we never invoke kmdb until after the kernel takes
 817          * over the descriptor tables why not have it use the kernel's
 818          * selectors?
 819          */
 820         if (boothowto & RB_DEBUG) {
 821                 set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
 822                     SDP_PAGES, SDP_OP32);
 823                 set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
 824                     SDP_PAGES, SDP_OP32);
 825         }
 826 
 827         /*
 828          * Clear write permission for page containing the gdt and install it.
 829          */
 830         ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
 831         kbm_read_only((uintptr_t)gdt0, gdtpa);
 832         xen_set_gdt(ma, NGDT);
 833 
 834         /*
 835          * Reload the segment registers to use the new GDT
 836          */
 837         load_segment_registers(
 838             KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
 839 
 840         return (gdt0);
 841 }
 842 
 843 #else   /* __xpv */
 844 
 845 static user_desc_t *
 846 init_gdt(void)
 847 {
 848         desctbr_t       r_bgdt, r_gdt;
 849         user_desc_t     *bgdt;
 850 
 851 #if !defined(__lint)
 852         /*
 853          * Our gdt is never larger than a single page.
 854          */
 855         ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
 856 #endif
 857         /*
 858          * XXX this allocation belongs in our caller, not here.
 859          */
 860         gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
 861             PAGESIZE, PAGESIZE);
 862         bzero(gdt0, PAGESIZE);
 863 
 864         init_gdt_common(gdt0);
 865 
 866         /*
 867          * Copy in from boot's gdt to our gdt entries.
 868          * Entry 0 is null descriptor by definition.
 869          */
 870         rd_gdtr(&r_bgdt);
 871         bgdt = (user_desc_t *)r_bgdt.dtr_base;
 872         if (bgdt == NULL)
 873                 panic("null boot gdt");
 874 
 875         gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
 876         gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
 877         gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
 878         gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
 879 
 880         /*
 881          * Install our new GDT
 882          */
 883         r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
 884         r_gdt.dtr_base = (uintptr_t)gdt0;
 885         wr_gdtr(&r_gdt);
 886 
 887         /*
 888          * Reload the segment registers to use the new GDT
 889          */
 890         load_segment_registers(
 891             KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
 892 
 893         return (gdt0);
 894 }
 895 
 896 #endif  /* __xpv */
 897 #endif  /* __i386 */
 898 
 899 /*
 900  * Build kernel IDT.
 901  *
 902  * Note that for amd64 we pretty much require every gate to be an interrupt
 903  * gate which blocks interrupts atomically on entry; that's because of our
 904  * dependency on using 'swapgs' every time we come into the kernel to find
 905  * the cpu structure. If we get interrupted just before doing that, %cs could
 906  * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
 907  * %gsbase is really still pointing at something in userland. Bad things will
 908  * ensue. We also use interrupt gates for i386 as well even though this is not
 909  * required for some traps.
 910  *
 911  * Perhaps they should have invented a trap gate that does an atomic swapgs?
 912  */
 913 static void
 914 init_idt_common(gate_desc_t *idt)
 915 {
 916         set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 917             0);
 918         set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 919             0);
 920         set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 921             0);
 922         set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 923             0);
 924         set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 925             0);
 926         set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
 927             TRP_KPL, 0);
 928         set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 929             0);
 930         set_gatesegd(&idt[T_NOEXTFLT], &ndptrap,  KCS_SEL, SDT_SYSIGT, TRP_KPL,
 931             0);
 932 
 933         /*
 934          * double fault handler.
 935          *
 936          * Note that on the hypervisor a guest does not receive #df faults.
 937          * Instead a failsafe event is injected into the guest if its selectors
 938          * and/or stack is in a broken state. See xen_failsafe_callback.
 939          */
 940 #if !defined(__xpv)
 941 #if defined(__amd64)
 942 
 943         set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 944             T_DBLFLT);
 945 
 946 #elif defined(__i386)
 947 
 948         /*
 949          * task gate required.
 950          */
 951         set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL,
 952             0);
 953 
 954 #endif  /* __i386 */
 955 #endif  /* !__xpv */
 956 
 957         /*
 958          * T_EXTOVRFLT coprocessor-segment-overrun not supported.
 959          */
 960 
 961         set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 962             0);
 963         set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 964             0);
 965         set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 966         set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 967         set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 968         set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
 969             0);
 970         set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
 971             TRP_KPL, 0);
 972         set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 973         set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
 974 
 975         /*
 976          * install fast trap handler at 210.
 977          */
 978         set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
 979             0);
 980 
 981         /*
 982          * System call handler.
 983          */
 984 #if defined(__amd64)
 985         set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
 986             TRP_UPL, 0);
 987 
 988 #elif defined(__i386)
 989         set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
 990             TRP_UPL, 0);
 991 #endif  /* __i386 */
 992 
 993         /*
 994          * Install the DTrace interrupt handler for the pid provider.
 995          */
 996         set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
 997             SDT_SYSIGT, TRP_UPL, 0);
 998 
 999         /*
1000          * Prepare interposing descriptor for the syscall handler
1001          * and cache copy of the default descriptor.
1002          */
1003         brand_tbl[0].ih_inum = T_SYSCALLINT;
1004         brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
1005 
1006 #if defined(__amd64)
1007         set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int,
1008             KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1009 #elif defined(__i386)
1010         set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call,
1011             KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1012 #endif  /* __i386 */
1013 
1014         brand_tbl[1].ih_inum = 0;
1015 }
1016 
1017 #if defined(__xpv)
1018 
1019 static void
1020 init_idt(gate_desc_t *idt)
1021 {
1022         init_idt_common(idt);
1023 }
1024 
1025 #else   /* __xpv */
1026 
1027 static void
1028 init_idt(gate_desc_t *idt)
1029 {
1030         char    ivctname[80];
1031         void    (*ivctptr)(void);
1032         int     i;
1033 
1034         /*
1035          * Initialize entire table with 'reserved' trap and then overwrite
1036          * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1037          * since it can only be generated on a 386 processor. 15 is also
1038          * unsupported and reserved.
1039          */
1040         for (i = 0; i < NIDT; i++)
1041                 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1042                     0);
1043 
1044         /*
1045          * 20-31 reserved
1046          */
1047         for (i = 20; i < 32; i++)
1048                 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1049                     0);
1050 
1051         /*
1052          * interrupts 32 - 255
1053          */
1054         for (i = 32; i < 256; i++) {
1055                 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
1056                 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1057                 if (ivctptr == NULL)
1058                         panic("kobj_getsymvalue(%s) failed", ivctname);
1059 
1060                 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
1061         }
1062 
1063         /*
1064          * Now install the common ones. Note that it will overlay some
1065          * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1066          */
1067         init_idt_common(idt);
1068 }
1069 
1070 #endif  /* __xpv */
1071 
1072 /*
1073  * The kernel does not deal with LDTs unless a user explicitly creates
1074  * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1075  * to reference the LDT will therefore cause a #gp. System calls made via the
1076  * obsolete lcall mechanism are emulated by the #gp fault handler.
1077  */
1078 static void
1079 init_ldt(void)
1080 {
1081 #if defined(__xpv)
1082         xen_set_ldt(NULL, 0);
1083 #else
1084         wr_ldtr(0);
1085 #endif
1086 }
1087 
1088 #if !defined(__xpv)
1089 #if defined(__amd64)
1090 
1091 static void
1092 init_tss(void)
1093 {
1094         /*
1095          * tss_rsp0 is dynamically filled in by resume() on each context switch.
1096          * All exceptions but #DF will run on the thread stack.
1097          * Set up the double fault stack here.
1098          */
1099         ktss0->tss_ist1 =
1100             (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1101 
1102         /*
1103          * Set I/O bit map offset equal to size of TSS segment limit
1104          * for no I/O permission map. This will force all user I/O
1105          * instructions to generate #gp fault.
1106          */
1107         ktss0->tss_bitmapbase = sizeof (*ktss0);
1108 
1109         /*
1110          * Point %tr to descriptor for ktss0 in gdt.
1111          */
1112         wr_tsr(KTSS_SEL);
1113 }
1114 
1115 #elif defined(__i386)
1116 
1117 static void
1118 init_tss(void)
1119 {
1120         /*
1121          * ktss0->tss_esp dynamically filled in by resume() on each
1122          * context switch.
1123          */
1124         ktss0->tss_ss0       = KDS_SEL;
1125         ktss0->tss_eip       = (uint32_t)_start;
1126         ktss0->tss_ds        = ktss0->tss_es = ktss0->tss_ss = KDS_SEL;
1127         ktss0->tss_cs        = KCS_SEL;
1128         ktss0->tss_fs        = KFS_SEL;
1129         ktss0->tss_gs        = KGS_SEL;
1130         ktss0->tss_ldt       = ULDT_SEL;
1131 
1132         /*
1133          * Initialize double fault tss.
1134          */
1135         dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1136         dftss0->tss_ss0      = KDS_SEL;
1137 
1138         /*
1139          * tss_cr3 will get initialized in hat_kern_setup() once our page
1140          * tables have been setup.
1141          */
1142         dftss0->tss_eip      = (uint32_t)syserrtrap;
1143         dftss0->tss_esp      = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1144         dftss0->tss_cs       = KCS_SEL;
1145         dftss0->tss_ds       = KDS_SEL;
1146         dftss0->tss_es       = KDS_SEL;
1147         dftss0->tss_ss       = KDS_SEL;
1148         dftss0->tss_fs       = KFS_SEL;
1149         dftss0->tss_gs       = KGS_SEL;
1150 
1151         /*
1152          * Set I/O bit map offset equal to size of TSS segment limit
1153          * for no I/O permission map. This will force all user I/O
1154          * instructions to generate #gp fault.
1155          */
1156         ktss0->tss_bitmapbase = sizeof (*ktss0);
1157 
1158         /*
1159          * Point %tr to descriptor for ktss0 in gdt.
1160          */
1161         wr_tsr(KTSS_SEL);
1162 }
1163 
1164 #endif  /* __i386 */
1165 #endif  /* !__xpv */
1166 
1167 #if defined(__xpv)
1168 
1169 void
1170 init_desctbls(void)
1171 {
1172         uint_t vec;
1173         user_desc_t *gdt;
1174 
1175         /*
1176          * Setup and install our GDT.
1177          */
1178         gdt = init_gdt();
1179 
1180         /*
1181          * Store static pa of gdt to speed up pa_to_ma() translations
1182          * on lwp context switches.
1183          */
1184         ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1185         CPU->cpu_gdt = gdt;
1186         CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
1187 
1188         /*
1189          * Setup and install our IDT.
1190          */
1191 #if !defined(__lint)
1192         ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1193 #endif
1194         idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1195             PAGESIZE, PAGESIZE);
1196         bzero(idt0, PAGESIZE);
1197         init_idt(idt0);
1198         for (vec = 0; vec < NIDT; vec++)
1199                 xen_idt_write(&idt0[vec], vec);
1200 
1201         CPU->cpu_idt = idt0;
1202 
1203         /*
1204          * set default kernel stack
1205          */
1206         xen_stack_switch(KDS_SEL,
1207             (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1208 
1209         xen_init_callbacks();
1210 
1211         init_ldt();
1212 }
1213 
1214 #else   /* __xpv */
1215 
1216 void
1217 init_desctbls(void)
1218 {
1219         user_desc_t *gdt;
1220         desctbr_t idtr;
1221 
1222         /*
1223          * Allocate IDT and TSS structures on unique pages for better
1224          * performance in virtual machines.
1225          */
1226 #if !defined(__lint)
1227         ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1228 #endif
1229         idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1230             PAGESIZE, PAGESIZE);
1231         bzero(idt0, PAGESIZE);
1232 #if !defined(__lint)
1233         ASSERT(sizeof (*ktss0) <= PAGESIZE);
1234 #endif
1235         ktss0 = (struct tss *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
1236             PAGESIZE, PAGESIZE);
1237         bzero(ktss0, PAGESIZE);
1238 
1239 #if defined(__i386)
1240 #if !defined(__lint)
1241         ASSERT(sizeof (*dftss0) <= PAGESIZE);
1242 #endif
1243         dftss0 = (struct tss *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1244             PAGESIZE, PAGESIZE);
1245         bzero(dftss0, PAGESIZE);
1246 #endif
1247 
1248         /*
1249          * Setup and install our GDT.
1250          */
1251         gdt = init_gdt();
1252         ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1253         CPU->cpu_gdt = gdt;
1254 
1255         /*
1256          * Setup and install our IDT.
1257          */
1258         init_idt(idt0);
1259 
1260         idtr.dtr_base = (uintptr_t)idt0;
1261         idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1262         wr_idtr(&idtr);
1263         CPU->cpu_idt = idt0;
1264 
1265 #if defined(__i386)
1266         /*
1267          * We maintain a description of idt0 in convenient IDTR format
1268          * for #pf's on some older pentium processors. See pentium_pftrap().
1269          */
1270         idt0_default_r = idtr;
1271 #endif  /* __i386 */
1272 
1273         init_tss();
1274         CPU->cpu_tss = ktss0;
1275         init_ldt();
1276 }
1277 
1278 #endif  /* __xpv */
1279 
1280 /*
1281  * In the early kernel, we need to set up a simple GDT to run on.
1282  *
1283  * XXPV Can dboot use this too?  See dboot_gdt.s
1284  */
1285 void
1286 init_boot_gdt(user_desc_t *bgdt)
1287 {
1288 #if defined(__amd64)
1289         set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1290             SDP_PAGES, SDP_OP32);
1291         set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1292             SDP_PAGES, SDP_OP32);
1293 #elif defined(__i386)
1294         set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1295             SDP_PAGES, SDP_OP32);
1296         set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
1297             SDP_PAGES, SDP_OP32);
1298 #endif  /* __i386 */
1299 }
1300 
1301 /*
1302  * Enable interpositioning on the system call path by rewriting the
1303  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1304  * the branded entry points.
1305  */
1306 void
1307 brand_interpositioning_enable(void)
1308 {
1309         gate_desc_t     *idt = CPU->cpu_idt;
1310         int             i;
1311 
1312         ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1313 
1314         for (i = 0; brand_tbl[i].ih_inum; i++) {
1315                 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1316 #if defined(__xpv)
1317                 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1318                     brand_tbl[i].ih_inum);
1319 #endif
1320         }
1321 
1322 #if defined(__amd64)
1323 #if defined(__xpv)
1324 
1325         /*
1326          * Currently the hypervisor only supports 64-bit syscalls via
1327          * syscall instruction. The 32-bit syscalls are handled by
1328          * interrupt gate above.
1329          */
1330         xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1331             CALLBACKF_mask_events);
1332 
1333 #else
1334 
1335         if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1336                 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1337                 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1338         }
1339 
1340 #endif
1341 #endif  /* __amd64 */
1342 
1343         if (is_x86_feature(x86_featureset, X86FSET_SEP))
1344                 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1345 }
1346 
1347 /*
1348  * Disable interpositioning on the system call path by rewriting the
1349  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1350  * the standard entry points, which bypass the interpositioning hooks.
1351  */
1352 void
1353 brand_interpositioning_disable(void)
1354 {
1355         gate_desc_t     *idt = CPU->cpu_idt;
1356         int i;
1357 
1358         ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1359 
1360         for (i = 0; brand_tbl[i].ih_inum; i++) {
1361                 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1362 #if defined(__xpv)
1363                 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1364                     brand_tbl[i].ih_inum);
1365 #endif
1366         }
1367 
1368 #if defined(__amd64)
1369 #if defined(__xpv)
1370 
1371         /*
1372          * See comment above in brand_interpositioning_enable.
1373          */
1374         xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1375             CALLBACKF_mask_events);
1376 
1377 #else
1378 
1379         if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1380                 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1381                 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1382         }
1383 
1384 #endif
1385 #endif  /* __amd64 */
1386 
1387         if (is_x86_feature(x86_featureset, X86FSET_SEP))
1388                 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1389 }