1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 1992 Terrence R. Lambert. 28 * Copyright (c) 1990 The Regents of the University of California. 29 * All rights reserved. 30 * 31 * This code is derived from software contributed to Berkeley by 32 * William Jolitz. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the University of 45 * California, Berkeley and its contributors. 46 * 4. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 63 */ 64 65 #include <sys/types.h> 66 #include <sys/sysmacros.h> 67 #include <sys/tss.h> 68 #include <sys/segments.h> 69 #include <sys/trap.h> 70 #include <sys/cpuvar.h> 71 #include <sys/bootconf.h> 72 #include <sys/x86_archext.h> 73 #include <sys/controlregs.h> 74 #include <sys/archsystm.h> 75 #include <sys/machsystm.h> 76 #include <sys/kobj.h> 77 #include <sys/cmn_err.h> 78 #include <sys/reboot.h> 79 #include <sys/kdi.h> 80 #include <sys/mach_mmu.h> 81 #include <sys/systm.h> 82 83 #ifdef __xpv 84 #include <sys/hypervisor.h> 85 #include <vm/as.h> 86 #endif 87 88 #include <sys/promif.h> 89 #include <sys/bootinfo.h> 90 #include <vm/kboot_mmu.h> 91 #include <vm/hat_pte.h> 92 93 /* 94 * cpu0 and default tables and structures. 95 */ 96 user_desc_t *gdt0; 97 #if !defined(__xpv) 98 desctbr_t gdt0_default_r; 99 #endif 100 101 gate_desc_t *idt0; /* interrupt descriptor table */ 102 #if defined(__i386) 103 desctbr_t idt0_default_r; /* describes idt0 in IDTR format */ 104 #endif 105 106 struct tss *ktss0; /* kernel task state structure */ 107 108 #if defined(__i386) 109 struct tss *dftss0; /* #DF double-fault exception */ 110 #endif /* __i386 */ 111 112 user_desc_t zero_udesc; /* base zero user desc native procs */ 113 user_desc_t null_udesc; /* null user descriptor */ 114 system_desc_t null_sdesc; /* null system descriptor */ 115 116 #if defined(__amd64) 117 user_desc_t zero_u32desc; /* 32-bit compatibility procs */ 118 #endif /* __amd64 */ 119 120 #if defined(__amd64) 121 user_desc_t ucs_on; 122 user_desc_t ucs_off; 123 user_desc_t ucs32_on; 124 user_desc_t ucs32_off; 125 #endif /* __amd64 */ 126 127 #pragma align 16(dblfault_stack0) 128 char dblfault_stack0[DEFAULTSTKSZ]; 129 130 extern void fast_null(void); 131 extern hrtime_t get_hrtime(void); 132 extern hrtime_t gethrvtime(void); 133 extern hrtime_t get_hrestime(void); 134 extern uint64_t getlgrp(void); 135 136 void (*(fasttable[]))(void) = { 137 fast_null, /* T_FNULL routine */ 138 fast_null, /* T_FGETFP routine (initially null) */ 139 fast_null, /* T_FSETFP routine (initially null) */ 140 (void (*)())get_hrtime, /* T_GETHRTIME */ 141 (void (*)())gethrvtime, /* T_GETHRVTIME */ 142 (void (*)())get_hrestime, /* T_GETHRESTIME */ 143 (void (*)())getlgrp /* T_GETLGRP */ 144 }; 145 146 /* 147 * Structure containing pre-computed descriptors to allow us to temporarily 148 * interpose on a standard handler. 149 */ 150 struct interposing_handler { 151 int ih_inum; 152 gate_desc_t ih_interp_desc; 153 gate_desc_t ih_default_desc; 154 }; 155 156 /* 157 * The brand infrastructure interposes on two handlers, and we use one as a 158 * NULL signpost. 159 */ 160 static struct interposing_handler brand_tbl[3]; 161 162 /* 163 * software prototypes for default local descriptor table 164 */ 165 166 /* 167 * Routines for loading segment descriptors in format the hardware 168 * can understand. 169 */ 170 171 #if defined(__amd64) 172 173 /* 174 * In long mode we have the new L or long mode attribute bit 175 * for code segments. Only the conforming bit in type is used along 176 * with descriptor priority and present bits. Default operand size must 177 * be zero when in long mode. In 32-bit compatibility mode all fields 178 * are treated as in legacy mode. For data segments while in long mode 179 * only the present bit is loaded. 180 */ 181 void 182 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size, 183 uint_t type, uint_t dpl, uint_t gran, uint_t defopsz) 184 { 185 ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG); 186 187 /* 188 * 64-bit long mode. 189 */ 190 if (lmode == SDP_LONG) 191 dp->usd_def32 = 0; /* 32-bit operands only */ 192 else 193 /* 194 * 32-bit compatibility mode. 195 */ 196 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */ 197 198 dp->usd_long = lmode; /* 64-bit mode */ 199 dp->usd_type = type; 200 dp->usd_dpl = dpl; 201 dp->usd_p = 1; 202 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ 203 204 dp->usd_lobase = (uintptr_t)base; 205 dp->usd_midbase = (uintptr_t)base >> 16; 206 dp->usd_hibase = (uintptr_t)base >> (16 + 8); 207 dp->usd_lolimit = size; 208 dp->usd_hilimit = (uintptr_t)size >> 16; 209 } 210 211 #elif defined(__i386) 212 213 /* 214 * Install user segment descriptor for code and data. 215 */ 216 void 217 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type, 218 uint_t dpl, uint_t gran, uint_t defopsz) 219 { 220 dp->usd_lolimit = size; 221 dp->usd_hilimit = (uintptr_t)size >> 16; 222 223 dp->usd_lobase = (uintptr_t)base; 224 dp->usd_midbase = (uintptr_t)base >> 16; 225 dp->usd_hibase = (uintptr_t)base >> (16 + 8); 226 227 dp->usd_type = type; 228 dp->usd_dpl = dpl; 229 dp->usd_p = 1; 230 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32 bit operands */ 231 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ 232 } 233 234 #endif /* __i386 */ 235 236 /* 237 * Install system segment descriptor for LDT and TSS segments. 238 */ 239 240 #if defined(__amd64) 241 242 void 243 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, 244 uint_t dpl) 245 { 246 dp->ssd_lolimit = size; 247 dp->ssd_hilimit = (uintptr_t)size >> 16; 248 249 dp->ssd_lobase = (uintptr_t)base; 250 dp->ssd_midbase = (uintptr_t)base >> 16; 251 dp->ssd_hibase = (uintptr_t)base >> (16 + 8); 252 dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8); 253 254 dp->ssd_type = type; 255 dp->ssd_zero1 = 0; /* must be zero */ 256 dp->ssd_zero2 = 0; 257 dp->ssd_dpl = dpl; 258 dp->ssd_p = 1; 259 dp->ssd_gran = 0; /* force byte units */ 260 } 261 262 void * 263 get_ssd_base(system_desc_t *dp) 264 { 265 uintptr_t base; 266 267 base = (uintptr_t)dp->ssd_lobase | 268 (uintptr_t)dp->ssd_midbase << 16 | 269 (uintptr_t)dp->ssd_hibase << (16 + 8) | 270 (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8); 271 return ((void *)base); 272 } 273 274 #elif defined(__i386) 275 276 void 277 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, 278 uint_t dpl) 279 { 280 dp->ssd_lolimit = size; 281 dp->ssd_hilimit = (uintptr_t)size >> 16; 282 283 dp->ssd_lobase = (uintptr_t)base; 284 dp->ssd_midbase = (uintptr_t)base >> 16; 285 dp->ssd_hibase = (uintptr_t)base >> (16 + 8); 286 287 dp->ssd_type = type; 288 dp->ssd_zero = 0; /* must be zero */ 289 dp->ssd_dpl = dpl; 290 dp->ssd_p = 1; 291 dp->ssd_gran = 0; /* force byte units */ 292 } 293 294 void * 295 get_ssd_base(system_desc_t *dp) 296 { 297 uintptr_t base; 298 299 base = (uintptr_t)dp->ssd_lobase | 300 (uintptr_t)dp->ssd_midbase << 16 | 301 (uintptr_t)dp->ssd_hibase << (16 + 8); 302 return ((void *)base); 303 } 304 305 #endif /* __i386 */ 306 307 /* 308 * Install gate segment descriptor for interrupt, trap, call and task gates. 309 */ 310 311 #if defined(__amd64) 312 313 /*ARGSUSED*/ 314 void 315 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, 316 uint_t type, uint_t dpl, uint_t vector) 317 { 318 dp->sgd_looffset = (uintptr_t)func; 319 dp->sgd_hioffset = (uintptr_t)func >> 16; 320 dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16); 321 322 dp->sgd_selector = (uint16_t)sel; 323 324 /* 325 * For 64 bit native we use the IST stack mechanism 326 * for double faults. All other traps use the CPL = 0 327 * (tss_rsp0) stack. 328 */ 329 #if !defined(__xpv) 330 if (vector == T_DBLFLT) 331 dp->sgd_ist = 1; 332 else 333 #endif 334 dp->sgd_ist = 0; 335 336 dp->sgd_type = type; 337 dp->sgd_dpl = dpl; 338 dp->sgd_p = 1; 339 } 340 341 #elif defined(__i386) 342 343 /*ARGSUSED*/ 344 void 345 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, 346 uint_t type, uint_t dpl, uint_t unused) 347 { 348 dp->sgd_looffset = (uintptr_t)func; 349 dp->sgd_hioffset = (uintptr_t)func >> 16; 350 351 dp->sgd_selector = (uint16_t)sel; 352 dp->sgd_stkcpy = 0; /* always zero bytes */ 353 dp->sgd_type = type; 354 dp->sgd_dpl = dpl; 355 dp->sgd_p = 1; 356 } 357 358 #endif /* __i386 */ 359 360 /* 361 * Updates a single user descriptor in the the GDT of the current cpu. 362 * Caller is responsible for preventing cpu migration. 363 */ 364 365 void 366 gdt_update_usegd(uint_t sidx, user_desc_t *udp) 367 { 368 #if defined(__xpv) 369 370 uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx; 371 372 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp)) 373 panic("gdt_update_usegd: HYPERVISOR_update_descriptor"); 374 375 #else /* __xpv */ 376 377 CPU->cpu_gdt[sidx] = *udp; 378 379 #endif /* __xpv */ 380 } 381 382 /* 383 * Writes single descriptor pointed to by udp into a processes 384 * LDT entry pointed to by ldp. 385 */ 386 int 387 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp) 388 { 389 #if defined(__xpv) 390 391 uint64_t dpa; 392 393 dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) | 394 ((uintptr_t)ldp & PAGEOFFSET); 395 396 /* 397 * The hypervisor is a little more restrictive about what it 398 * supports in the LDT. 399 */ 400 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0) 401 return (EINVAL); 402 403 #else /* __xpv */ 404 405 *ldp = *udp; 406 407 #endif /* __xpv */ 408 return (0); 409 } 410 411 #if defined(__xpv) 412 413 /* 414 * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor. 415 * Returns true if a valid entry was written. 416 */ 417 int 418 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg) 419 { 420 trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */ 421 422 /* 423 * skip holes in the IDT 424 */ 425 if (GATESEG_GETOFFSET(sgd) == 0) 426 return (0); 427 428 ASSERT(sgd->sgd_type == SDT_SYSIGT); 429 ti->vector = vec; 430 TI_SET_DPL(ti, sgd->sgd_dpl); 431 432 /* 433 * Is this an interrupt gate? 434 */ 435 if (sgd->sgd_type == SDT_SYSIGT) { 436 /* LINTED */ 437 TI_SET_IF(ti, 1); 438 } 439 ti->cs = sgd->sgd_selector; 440 #if defined(__amd64) 441 ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */ 442 #endif 443 ti->address = GATESEG_GETOFFSET(sgd); 444 return (1); 445 } 446 447 /* 448 * Convert a single hw format gate descriptor and write it into our virtual IDT. 449 */ 450 void 451 xen_idt_write(gate_desc_t *sgd, uint_t vec) 452 { 453 trap_info_t trapinfo[2]; 454 455 bzero(trapinfo, sizeof (trapinfo)); 456 if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0) 457 return; 458 if (xen_set_trap_table(trapinfo) != 0) 459 panic("xen_idt_write: xen_set_trap_table() failed"); 460 } 461 462 #endif /* __xpv */ 463 464 #if defined(__amd64) 465 466 /* 467 * Build kernel GDT. 468 */ 469 470 static void 471 init_gdt_common(user_desc_t *gdt) 472 { 473 int i; 474 475 /* 476 * 64-bit kernel code segment. 477 */ 478 set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL, 479 SDP_PAGES, SDP_OP32); 480 481 /* 482 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit 483 * mode, but we set it here to 0xFFFF so that we can use the SYSRET 484 * instruction to return from system calls back to 32-bit applications. 485 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds 486 * descriptors. We therefore must ensure that the kernel uses something, 487 * though it will be ignored by hardware, that is compatible with 32-bit 488 * apps. For the same reason we must set the default op size of this 489 * descriptor to 32-bit operands. 490 */ 491 set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA, 492 SEL_KPL, SDP_PAGES, SDP_OP32); 493 gdt[GDT_KDATA].usd_def32 = 1; 494 495 /* 496 * 64-bit user code segment. 497 */ 498 set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL, 499 SDP_PAGES, SDP_OP32); 500 501 /* 502 * 32-bit user code segment. 503 */ 504 set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA, 505 SEL_UPL, SDP_PAGES, SDP_OP32); 506 507 /* 508 * See gdt_ucode32() and gdt_ucode_native(). 509 */ 510 ucs_on = ucs_off = gdt[GDT_UCODE]; 511 ucs_off.usd_p = 0; /* forces #np fault */ 512 513 ucs32_on = ucs32_off = gdt[GDT_U32CODE]; 514 ucs32_off.usd_p = 0; /* forces #np fault */ 515 516 /* 517 * 32 and 64 bit data segments can actually share the same descriptor. 518 * In long mode only the present bit is checked but all other fields 519 * are loaded. But in compatibility mode all fields are interpreted 520 * as in legacy mode so they must be set correctly for a 32-bit data 521 * segment. 522 */ 523 set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL, 524 SDP_PAGES, SDP_OP32); 525 526 #if !defined(__xpv) 527 528 /* 529 * The 64-bit kernel has no default LDT. By default, the LDT descriptor 530 * in the GDT is 0. 531 */ 532 533 /* 534 * Kernel TSS 535 */ 536 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0, 537 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL); 538 539 #endif /* !__xpv */ 540 541 /* 542 * Initialize fs and gs descriptors for 32 bit processes. 543 * Only attributes and limits are initialized, the effective 544 * base address is programmed via fsbase/gsbase. 545 */ 546 set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA, 547 SEL_UPL, SDP_PAGES, SDP_OP32); 548 set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA, 549 SEL_UPL, SDP_PAGES, SDP_OP32); 550 551 /* 552 * Initialize the descriptors set aside for brand usage. 553 * Only attributes and limits are initialized. 554 */ 555 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) 556 set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA, 557 SEL_UPL, SDP_PAGES, SDP_OP32); 558 559 /* 560 * Initialize convenient zero base user descriptors for clearing 561 * lwp private %fs and %gs descriptors in GDT. See setregs() for 562 * an example. 563 */ 564 set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL, 565 SDP_BYTES, SDP_OP32); 566 set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL, 567 SDP_PAGES, SDP_OP32); 568 } 569 570 #if defined(__xpv) 571 572 static user_desc_t * 573 init_gdt(void) 574 { 575 uint64_t gdtpa; 576 ulong_t ma[1]; /* XXPV should be a memory_t */ 577 ulong_t addr; 578 579 #if !defined(__lint) 580 /* 581 * Our gdt is never larger than a single page. 582 */ 583 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 584 #endif 585 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 586 PAGESIZE, PAGESIZE); 587 bzero(gdt0, PAGESIZE); 588 589 init_gdt_common(gdt0); 590 591 /* 592 * XXX Since we never invoke kmdb until after the kernel takes 593 * over the descriptor tables why not have it use the kernel's 594 * selectors? 595 */ 596 if (boothowto & RB_DEBUG) { 597 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, 598 SEL_KPL, SDP_PAGES, SDP_OP32); 599 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, 600 SEL_KPL, SDP_PAGES, SDP_OP32); 601 } 602 603 /* 604 * Clear write permission for page containing the gdt and install it. 605 */ 606 gdtpa = pfn_to_pa(va_to_pfn(gdt0)); 607 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT); 608 kbm_read_only((uintptr_t)gdt0, gdtpa); 609 xen_set_gdt(ma, NGDT); 610 611 /* 612 * Reload the segment registers to use the new GDT. 613 * On 64-bit, fixup KCS_SEL to be in ring 3. 614 * See KCS_SEL in segments.h. 615 */ 616 load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL); 617 618 /* 619 * setup %gs for kernel 620 */ 621 xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]); 622 623 /* 624 * XX64 We should never dereference off "other gsbase" or 625 * "fsbase". So, we should arrange to point FSBASE and 626 * KGSBASE somewhere truly awful e.g. point it at the last 627 * valid address below the hole so that any attempts to index 628 * off them cause an exception. 629 * 630 * For now, point it at 8G -- at least it should be unmapped 631 * until some 64-bit processes run. 632 */ 633 addr = 0x200000000ul; 634 xen_set_segment_base(SEGBASE_FS, addr); 635 xen_set_segment_base(SEGBASE_GS_USER, addr); 636 xen_set_segment_base(SEGBASE_GS_USER_SEL, 0); 637 638 return (gdt0); 639 } 640 641 #else /* __xpv */ 642 643 static user_desc_t * 644 init_gdt(void) 645 { 646 desctbr_t r_bgdt, r_gdt; 647 user_desc_t *bgdt; 648 649 #if !defined(__lint) 650 /* 651 * Our gdt is never larger than a single page. 652 */ 653 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 654 #endif 655 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 656 PAGESIZE, PAGESIZE); 657 bzero(gdt0, PAGESIZE); 658 659 init_gdt_common(gdt0); 660 661 /* 662 * Copy in from boot's gdt to our gdt. 663 * Entry 0 is the null descriptor by definition. 664 */ 665 rd_gdtr(&r_bgdt); 666 bgdt = (user_desc_t *)r_bgdt.dtr_base; 667 if (bgdt == NULL) 668 panic("null boot gdt"); 669 670 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA]; 671 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE]; 672 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE]; 673 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA]; 674 gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE]; 675 676 /* 677 * Install our new GDT 678 */ 679 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1; 680 r_gdt.dtr_base = (uintptr_t)gdt0; 681 wr_gdtr(&r_gdt); 682 683 /* 684 * Reload the segment registers to use the new GDT 685 */ 686 load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 687 688 /* 689 * setup %gs for kernel 690 */ 691 wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]); 692 693 /* 694 * XX64 We should never dereference off "other gsbase" or 695 * "fsbase". So, we should arrange to point FSBASE and 696 * KGSBASE somewhere truly awful e.g. point it at the last 697 * valid address below the hole so that any attempts to index 698 * off them cause an exception. 699 * 700 * For now, point it at 8G -- at least it should be unmapped 701 * until some 64-bit processes run. 702 */ 703 wrmsr(MSR_AMD_FSBASE, 0x200000000ul); 704 wrmsr(MSR_AMD_KGSBASE, 0x200000000ul); 705 return (gdt0); 706 } 707 708 #endif /* __xpv */ 709 710 #elif defined(__i386) 711 712 static void 713 init_gdt_common(user_desc_t *gdt) 714 { 715 int i; 716 717 /* 718 * Text and data for both kernel and user span entire 32 bit 719 * address space. 720 */ 721 722 /* 723 * kernel code segment. 724 */ 725 set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES, 726 SDP_OP32); 727 728 /* 729 * kernel data segment. 730 */ 731 set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES, 732 SDP_OP32); 733 734 /* 735 * user code segment. 736 */ 737 set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES, 738 SDP_OP32); 739 740 /* 741 * user data segment. 742 */ 743 set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES, 744 SDP_OP32); 745 746 #if !defined(__xpv) 747 748 /* 749 * TSS for T_DBLFLT (double fault) handler 750 */ 751 set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0, 752 sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL); 753 754 /* 755 * TSS for kernel 756 */ 757 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0, 758 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL); 759 760 #endif /* !__xpv */ 761 762 /* 763 * %gs selector for kernel 764 */ 765 set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA, 766 SEL_KPL, SDP_BYTES, SDP_OP32); 767 768 /* 769 * Initialize lwp private descriptors. 770 * Only attributes and limits are initialized, the effective 771 * base address is programmed via fsbase/gsbase. 772 */ 773 set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 774 SDP_PAGES, SDP_OP32); 775 set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 776 SDP_PAGES, SDP_OP32); 777 778 /* 779 * Initialize the descriptors set aside for brand usage. 780 * Only attributes and limits are initialized. 781 */ 782 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) 783 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, 784 SDP_PAGES, SDP_OP32); 785 /* 786 * Initialize convenient zero base user descriptor for clearing 787 * lwp private %fs and %gs descriptors in GDT. See setregs() for 788 * an example. 789 */ 790 set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL, 791 SDP_BYTES, SDP_OP32); 792 } 793 794 #if defined(__xpv) 795 796 static user_desc_t * 797 init_gdt(void) 798 { 799 uint64_t gdtpa; 800 ulong_t ma[1]; /* XXPV should be a memory_t */ 801 802 #if !defined(__lint) 803 /* 804 * Our gdt is never larger than a single page. 805 */ 806 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 807 #endif 808 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 809 PAGESIZE, PAGESIZE); 810 bzero(gdt0, PAGESIZE); 811 812 init_gdt_common(gdt0); 813 gdtpa = pfn_to_pa(va_to_pfn(gdt0)); 814 815 /* 816 * XXX Since we never invoke kmdb until after the kernel takes 817 * over the descriptor tables why not have it use the kernel's 818 * selectors? 819 */ 820 if (boothowto & RB_DEBUG) { 821 set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL, 822 SDP_PAGES, SDP_OP32); 823 set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL, 824 SDP_PAGES, SDP_OP32); 825 } 826 827 /* 828 * Clear write permission for page containing the gdt and install it. 829 */ 830 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT); 831 kbm_read_only((uintptr_t)gdt0, gdtpa); 832 xen_set_gdt(ma, NGDT); 833 834 /* 835 * Reload the segment registers to use the new GDT 836 */ 837 load_segment_registers( 838 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 839 840 return (gdt0); 841 } 842 843 #else /* __xpv */ 844 845 static user_desc_t * 846 init_gdt(void) 847 { 848 desctbr_t r_bgdt, r_gdt; 849 user_desc_t *bgdt; 850 851 #if !defined(__lint) 852 /* 853 * Our gdt is never larger than a single page. 854 */ 855 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 856 #endif 857 /* 858 * XXX this allocation belongs in our caller, not here. 859 */ 860 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 861 PAGESIZE, PAGESIZE); 862 bzero(gdt0, PAGESIZE); 863 864 init_gdt_common(gdt0); 865 866 /* 867 * Copy in from boot's gdt to our gdt entries. 868 * Entry 0 is null descriptor by definition. 869 */ 870 rd_gdtr(&r_bgdt); 871 bgdt = (user_desc_t *)r_bgdt.dtr_base; 872 if (bgdt == NULL) 873 panic("null boot gdt"); 874 875 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA]; 876 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE]; 877 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE]; 878 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA]; 879 880 /* 881 * Install our new GDT 882 */ 883 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1; 884 r_gdt.dtr_base = (uintptr_t)gdt0; 885 wr_gdtr(&r_gdt); 886 887 /* 888 * Reload the segment registers to use the new GDT 889 */ 890 load_segment_registers( 891 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 892 893 return (gdt0); 894 } 895 896 #endif /* __xpv */ 897 #endif /* __i386 */ 898 899 /* 900 * Build kernel IDT. 901 * 902 * Note that for amd64 we pretty much require every gate to be an interrupt 903 * gate which blocks interrupts atomically on entry; that's because of our 904 * dependency on using 'swapgs' every time we come into the kernel to find 905 * the cpu structure. If we get interrupted just before doing that, %cs could 906 * be in kernel mode (so that the trap prolog doesn't do a swapgs), but 907 * %gsbase is really still pointing at something in userland. Bad things will 908 * ensue. We also use interrupt gates for i386 as well even though this is not 909 * required for some traps. 910 * 911 * Perhaps they should have invented a trap gate that does an atomic swapgs? 912 */ 913 static void 914 init_idt_common(gate_desc_t *idt) 915 { 916 set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 917 0); 918 set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 919 0); 920 set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL, 921 0); 922 set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL, 923 0); 924 set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL, 925 0); 926 set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT, 927 TRP_KPL, 0); 928 set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 929 0); 930 set_gatesegd(&idt[T_NOEXTFLT], &ndptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 931 0); 932 933 /* 934 * double fault handler. 935 * 936 * Note that on the hypervisor a guest does not receive #df faults. 937 * Instead a failsafe event is injected into the guest if its selectors 938 * and/or stack is in a broken state. See xen_failsafe_callback. 939 */ 940 #if !defined(__xpv) 941 #if defined(__amd64) 942 943 set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 944 T_DBLFLT); 945 946 #elif defined(__i386) 947 948 /* 949 * task gate required. 950 */ 951 set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL, 952 0); 953 954 #endif /* __i386 */ 955 #endif /* !__xpv */ 956 957 /* 958 * T_EXTOVRFLT coprocessor-segment-overrun not supported. 959 */ 960 961 set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 962 0); 963 set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 964 0); 965 set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 966 set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 967 set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 968 set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 969 0); 970 set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT, 971 TRP_KPL, 0); 972 set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 973 set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 974 975 /* 976 * install "int80" handler at, well, 0x80. 977 */ 978 set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, TRP_UPL, 979 0); 980 981 /* 982 * install fast trap handler at 210. 983 */ 984 set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL, 985 0); 986 987 /* 988 * System call handler. 989 */ 990 #if defined(__amd64) 991 set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT, 992 TRP_UPL, 0); 993 994 #elif defined(__i386) 995 set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT, 996 TRP_UPL, 0); 997 #endif /* __i386 */ 998 999 /* 1000 * Install the DTrace interrupt handler for the pid provider. 1001 */ 1002 set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL, 1003 SDT_SYSIGT, TRP_UPL, 0); 1004 1005 /* 1006 * Prepare interposing descriptors for the branded "int80" 1007 * and syscall handlers and cache copies of the default 1008 * descriptors. 1009 */ 1010 brand_tbl[0].ih_inum = T_INT80; 1011 brand_tbl[0].ih_default_desc = idt0[T_INT80]; 1012 set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL, 1013 SDT_SYSIGT, TRP_UPL, 0); 1014 1015 brand_tbl[1].ih_inum = T_SYSCALLINT; 1016 brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT]; 1017 1018 #if defined(__amd64) 1019 set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int, 1020 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0); 1021 #elif defined(__i386) 1022 set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call, 1023 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0); 1024 #endif /* __i386 */ 1025 1026 brand_tbl[2].ih_inum = 0; 1027 } 1028 1029 #if defined(__xpv) 1030 1031 static void 1032 init_idt(gate_desc_t *idt) 1033 { 1034 init_idt_common(idt); 1035 } 1036 1037 #else /* __xpv */ 1038 1039 static void 1040 init_idt(gate_desc_t *idt) 1041 { 1042 char ivctname[80]; 1043 void (*ivctptr)(void); 1044 int i; 1045 1046 /* 1047 * Initialize entire table with 'reserved' trap and then overwrite 1048 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved 1049 * since it can only be generated on a 386 processor. 15 is also 1050 * unsupported and reserved. 1051 */ 1052 for (i = 0; i < NIDT; i++) 1053 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 1054 0); 1055 1056 /* 1057 * 20-31 reserved 1058 */ 1059 for (i = 20; i < 32; i++) 1060 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 1061 0); 1062 1063 /* 1064 * interrupts 32 - 255 1065 */ 1066 for (i = 32; i < 256; i++) { 1067 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i); 1068 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0); 1069 if (ivctptr == NULL) 1070 panic("kobj_getsymvalue(%s) failed", ivctname); 1071 1072 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); 1073 } 1074 1075 /* 1076 * Now install the common ones. Note that it will overlay some 1077 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc. 1078 */ 1079 init_idt_common(idt); 1080 } 1081 1082 #endif /* __xpv */ 1083 1084 /* 1085 * The kernel does not deal with LDTs unless a user explicitly creates 1086 * one. Under normal circumstances, the LDTR contains 0. Any process attempting 1087 * to reference the LDT will therefore cause a #gp. System calls made via the 1088 * obsolete lcall mechanism are emulated by the #gp fault handler. 1089 */ 1090 static void 1091 init_ldt(void) 1092 { 1093 #if defined(__xpv) 1094 xen_set_ldt(NULL, 0); 1095 #else 1096 wr_ldtr(0); 1097 #endif 1098 } 1099 1100 #if !defined(__xpv) 1101 #if defined(__amd64) 1102 1103 static void 1104 init_tss(void) 1105 { 1106 /* 1107 * tss_rsp0 is dynamically filled in by resume() on each context switch. 1108 * All exceptions but #DF will run on the thread stack. 1109 * Set up the double fault stack here. 1110 */ 1111 ktss0->tss_ist1 = 1112 (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 1113 1114 /* 1115 * Set I/O bit map offset equal to size of TSS segment limit 1116 * for no I/O permission map. This will force all user I/O 1117 * instructions to generate #gp fault. 1118 */ 1119 ktss0->tss_bitmapbase = sizeof (*ktss0); 1120 1121 /* 1122 * Point %tr to descriptor for ktss0 in gdt. 1123 */ 1124 wr_tsr(KTSS_SEL); 1125 } 1126 1127 #elif defined(__i386) 1128 1129 static void 1130 init_tss(void) 1131 { 1132 /* 1133 * ktss0->tss_esp dynamically filled in by resume() on each 1134 * context switch. 1135 */ 1136 ktss0->tss_ss0 = KDS_SEL; 1137 ktss0->tss_eip = (uint32_t)_start; 1138 ktss0->tss_ds = ktss0->tss_es = ktss0->tss_ss = KDS_SEL; 1139 ktss0->tss_cs = KCS_SEL; 1140 ktss0->tss_fs = KFS_SEL; 1141 ktss0->tss_gs = KGS_SEL; 1142 ktss0->tss_ldt = ULDT_SEL; 1143 1144 /* 1145 * Initialize double fault tss. 1146 */ 1147 dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 1148 dftss0->tss_ss0 = KDS_SEL; 1149 1150 /* 1151 * tss_cr3 will get initialized in hat_kern_setup() once our page 1152 * tables have been setup. 1153 */ 1154 dftss0->tss_eip = (uint32_t)syserrtrap; 1155 dftss0->tss_esp = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 1156 dftss0->tss_cs = KCS_SEL; 1157 dftss0->tss_ds = KDS_SEL; 1158 dftss0->tss_es = KDS_SEL; 1159 dftss0->tss_ss = KDS_SEL; 1160 dftss0->tss_fs = KFS_SEL; 1161 dftss0->tss_gs = KGS_SEL; 1162 1163 /* 1164 * Set I/O bit map offset equal to size of TSS segment limit 1165 * for no I/O permission map. This will force all user I/O 1166 * instructions to generate #gp fault. 1167 */ 1168 ktss0->tss_bitmapbase = sizeof (*ktss0); 1169 1170 /* 1171 * Point %tr to descriptor for ktss0 in gdt. 1172 */ 1173 wr_tsr(KTSS_SEL); 1174 } 1175 1176 #endif /* __i386 */ 1177 #endif /* !__xpv */ 1178 1179 #if defined(__xpv) 1180 1181 void 1182 init_desctbls(void) 1183 { 1184 uint_t vec; 1185 user_desc_t *gdt; 1186 1187 /* 1188 * Setup and install our GDT. 1189 */ 1190 gdt = init_gdt(); 1191 1192 /* 1193 * Store static pa of gdt to speed up pa_to_ma() translations 1194 * on lwp context switches. 1195 */ 1196 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); 1197 CPU->cpu_gdt = gdt; 1198 CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt)); 1199 1200 /* 1201 * Setup and install our IDT. 1202 */ 1203 #if !defined(__lint) 1204 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE); 1205 #endif 1206 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA, 1207 PAGESIZE, PAGESIZE); 1208 bzero(idt0, PAGESIZE); 1209 init_idt(idt0); 1210 for (vec = 0; vec < NIDT; vec++) 1211 xen_idt_write(&idt0[vec], vec); 1212 1213 CPU->cpu_idt = idt0; 1214 1215 /* 1216 * set default kernel stack 1217 */ 1218 xen_stack_switch(KDS_SEL, 1219 (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]); 1220 1221 xen_init_callbacks(); 1222 1223 init_ldt(); 1224 } 1225 1226 #else /* __xpv */ 1227 1228 void 1229 init_desctbls(void) 1230 { 1231 user_desc_t *gdt; 1232 desctbr_t idtr; 1233 1234 /* 1235 * Allocate IDT and TSS structures on unique pages for better 1236 * performance in virtual machines. 1237 */ 1238 #if !defined(__lint) 1239 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE); 1240 #endif 1241 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA, 1242 PAGESIZE, PAGESIZE); 1243 bzero(idt0, PAGESIZE); 1244 #if !defined(__lint) 1245 ASSERT(sizeof (*ktss0) <= PAGESIZE); 1246 #endif 1247 ktss0 = (struct tss *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA, 1248 PAGESIZE, PAGESIZE); 1249 bzero(ktss0, PAGESIZE); 1250 1251 #if defined(__i386) 1252 #if !defined(__lint) 1253 ASSERT(sizeof (*dftss0) <= PAGESIZE); 1254 #endif 1255 dftss0 = (struct tss *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA, 1256 PAGESIZE, PAGESIZE); 1257 bzero(dftss0, PAGESIZE); 1258 #endif 1259 1260 /* 1261 * Setup and install our GDT. 1262 */ 1263 gdt = init_gdt(); 1264 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); 1265 CPU->cpu_gdt = gdt; 1266 1267 /* 1268 * Setup and install our IDT. 1269 */ 1270 init_idt(idt0); 1271 1272 idtr.dtr_base = (uintptr_t)idt0; 1273 idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1; 1274 wr_idtr(&idtr); 1275 CPU->cpu_idt = idt0; 1276 1277 #if defined(__i386) 1278 /* 1279 * We maintain a description of idt0 in convenient IDTR format 1280 * for #pf's on some older pentium processors. See pentium_pftrap(). 1281 */ 1282 idt0_default_r = idtr; 1283 #endif /* __i386 */ 1284 1285 init_tss(); 1286 CPU->cpu_tss = ktss0; 1287 init_ldt(); 1288 } 1289 1290 #endif /* __xpv */ 1291 1292 /* 1293 * In the early kernel, we need to set up a simple GDT to run on. 1294 * 1295 * XXPV Can dboot use this too? See dboot_gdt.s 1296 */ 1297 void 1298 init_boot_gdt(user_desc_t *bgdt) 1299 { 1300 #if defined(__amd64) 1301 set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL, 1302 SDP_PAGES, SDP_OP32); 1303 set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL, 1304 SDP_PAGES, SDP_OP32); 1305 #elif defined(__i386) 1306 set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL, 1307 SDP_PAGES, SDP_OP32); 1308 set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL, 1309 SDP_PAGES, SDP_OP32); 1310 #endif /* __i386 */ 1311 } 1312 1313 /* 1314 * Enable interpositioning on the system call path by rewriting the 1315 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 1316 * the branded entry points. 1317 */ 1318 void 1319 brand_interpositioning_enable(void) 1320 { 1321 gate_desc_t *idt = CPU->cpu_idt; 1322 int i; 1323 1324 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); 1325 1326 for (i = 0; brand_tbl[i].ih_inum; i++) { 1327 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc; 1328 #if defined(__xpv) 1329 xen_idt_write(&idt[brand_tbl[i].ih_inum], 1330 brand_tbl[i].ih_inum); 1331 #endif 1332 } 1333 1334 #if defined(__amd64) 1335 #if defined(__xpv) 1336 1337 /* 1338 * Currently the hypervisor only supports 64-bit syscalls via 1339 * syscall instruction. The 32-bit syscalls are handled by 1340 * interrupt gate above. 1341 */ 1342 xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall, 1343 CALLBACKF_mask_events); 1344 1345 #else 1346 1347 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) { 1348 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall); 1349 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32); 1350 } 1351 1352 #endif 1353 #endif /* __amd64 */ 1354 1355 if (is_x86_feature(x86_featureset, X86FSET_SEP)) 1356 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter); 1357 } 1358 1359 /* 1360 * Disable interpositioning on the system call path by rewriting the 1361 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 1362 * the standard entry points, which bypass the interpositioning hooks. 1363 */ 1364 void 1365 brand_interpositioning_disable(void) 1366 { 1367 gate_desc_t *idt = CPU->cpu_idt; 1368 int i; 1369 1370 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); 1371 1372 for (i = 0; brand_tbl[i].ih_inum; i++) { 1373 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc; 1374 #if defined(__xpv) 1375 xen_idt_write(&idt[brand_tbl[i].ih_inum], 1376 brand_tbl[i].ih_inum); 1377 #endif 1378 } 1379 1380 #if defined(__amd64) 1381 #if defined(__xpv) 1382 1383 /* 1384 * See comment above in brand_interpositioning_enable. 1385 */ 1386 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall, 1387 CALLBACKF_mask_events); 1388 1389 #else 1390 1391 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) { 1392 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall); 1393 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32); 1394 } 1395 1396 #endif 1397 #endif /* __amd64 */ 1398 1399 if (is_x86_feature(x86_featureset, X86FSET_SEP)) 1400 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter); 1401 }