Print this page
104 Bring back lx brand
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/intel/ia32/os/desctbls.c
+++ new/usr/src/uts/intel/ia32/os/desctbls.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * Copyright (c) 1992 Terrence R. Lambert.
28 28 * Copyright (c) 1990 The Regents of the University of California.
29 29 * All rights reserved.
30 30 *
31 31 * This code is derived from software contributed to Berkeley by
32 32 * William Jolitz.
33 33 *
34 34 * Redistribution and use in source and binary forms, with or without
35 35 * modification, are permitted provided that the following conditions
36 36 * are met:
37 37 * 1. Redistributions of source code must retain the above copyright
38 38 * notice, this list of conditions and the following disclaimer.
39 39 * 2. Redistributions in binary form must reproduce the above copyright
40 40 * notice, this list of conditions and the following disclaimer in the
41 41 * documentation and/or other materials provided with the distribution.
42 42 * 3. All advertising materials mentioning features or use of this software
43 43 * must display the following acknowledgement:
44 44 * This product includes software developed by the University of
45 45 * California, Berkeley and its contributors.
46 46 * 4. Neither the name of the University nor the names of its contributors
47 47 * may be used to endorse or promote products derived from this software
48 48 * without specific prior written permission.
49 49 *
50 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 60 * SUCH DAMAGE.
61 61 *
62 62 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
63 63 */
64 64
65 65 #include <sys/types.h>
66 66 #include <sys/sysmacros.h>
67 67 #include <sys/tss.h>
68 68 #include <sys/segments.h>
69 69 #include <sys/trap.h>
70 70 #include <sys/cpuvar.h>
71 71 #include <sys/bootconf.h>
72 72 #include <sys/x86_archext.h>
73 73 #include <sys/controlregs.h>
74 74 #include <sys/archsystm.h>
75 75 #include <sys/machsystm.h>
76 76 #include <sys/kobj.h>
77 77 #include <sys/cmn_err.h>
78 78 #include <sys/reboot.h>
79 79 #include <sys/kdi.h>
80 80 #include <sys/mach_mmu.h>
81 81 #include <sys/systm.h>
82 82
83 83 #ifdef __xpv
84 84 #include <sys/hypervisor.h>
85 85 #include <vm/as.h>
86 86 #endif
87 87
88 88 #include <sys/promif.h>
89 89 #include <sys/bootinfo.h>
90 90 #include <vm/kboot_mmu.h>
91 91 #include <vm/hat_pte.h>
92 92
93 93 /*
94 94 * cpu0 and default tables and structures.
95 95 */
96 96 user_desc_t *gdt0;
97 97 #if !defined(__xpv)
98 98 desctbr_t gdt0_default_r;
99 99 #endif
100 100
101 101 gate_desc_t *idt0; /* interrupt descriptor table */
102 102 #if defined(__i386)
103 103 desctbr_t idt0_default_r; /* describes idt0 in IDTR format */
104 104 #endif
105 105
106 106 struct tss *ktss0; /* kernel task state structure */
107 107
108 108 #if defined(__i386)
109 109 struct tss *dftss0; /* #DF double-fault exception */
110 110 #endif /* __i386 */
111 111
112 112 user_desc_t zero_udesc; /* base zero user desc native procs */
113 113 user_desc_t null_udesc; /* null user descriptor */
114 114 system_desc_t null_sdesc; /* null system descriptor */
115 115
116 116 #if defined(__amd64)
117 117 user_desc_t zero_u32desc; /* 32-bit compatibility procs */
118 118 #endif /* __amd64 */
119 119
120 120 #if defined(__amd64)
121 121 user_desc_t ucs_on;
122 122 user_desc_t ucs_off;
123 123 user_desc_t ucs32_on;
124 124 user_desc_t ucs32_off;
125 125 #endif /* __amd64 */
126 126
127 127 #pragma align 16(dblfault_stack0)
128 128 char dblfault_stack0[DEFAULTSTKSZ];
129 129
130 130 extern void fast_null(void);
131 131 extern hrtime_t get_hrtime(void);
132 132 extern hrtime_t gethrvtime(void);
133 133 extern hrtime_t get_hrestime(void);
134 134 extern uint64_t getlgrp(void);
135 135
136 136 void (*(fasttable[]))(void) = {
137 137 fast_null, /* T_FNULL routine */
138 138 fast_null, /* T_FGETFP routine (initially null) */
139 139 fast_null, /* T_FSETFP routine (initially null) */
140 140 (void (*)())get_hrtime, /* T_GETHRTIME */
141 141 (void (*)())gethrvtime, /* T_GETHRVTIME */
142 142 (void (*)())get_hrestime, /* T_GETHRESTIME */
143 143 (void (*)())getlgrp /* T_GETLGRP */
144 144 };
145 145
146 146 /*
147 147 * Structure containing pre-computed descriptors to allow us to temporarily
148 148 * interpose on a standard handler.
149 149 */
↓ open down ↓ |
149 lines elided |
↑ open up ↑ |
150 150 struct interposing_handler {
151 151 int ih_inum;
152 152 gate_desc_t ih_interp_desc;
153 153 gate_desc_t ih_default_desc;
154 154 };
155 155
156 156 /*
157 157 * The brand infrastructure interposes on two handlers, and we use one as a
158 158 * NULL signpost.
159 159 */
160 -static struct interposing_handler brand_tbl[2];
160 +static struct interposing_handler brand_tbl[3];
161 161
162 162 /*
163 163 * software prototypes for default local descriptor table
164 164 */
165 165
166 166 /*
167 167 * Routines for loading segment descriptors in format the hardware
168 168 * can understand.
169 169 */
170 170
171 171 #if defined(__amd64)
172 172
173 173 /*
174 174 * In long mode we have the new L or long mode attribute bit
175 175 * for code segments. Only the conforming bit in type is used along
176 176 * with descriptor priority and present bits. Default operand size must
177 177 * be zero when in long mode. In 32-bit compatibility mode all fields
178 178 * are treated as in legacy mode. For data segments while in long mode
179 179 * only the present bit is loaded.
180 180 */
181 181 void
182 182 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
183 183 uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
184 184 {
185 185 ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
186 186
187 187 /*
188 188 * 64-bit long mode.
189 189 */
190 190 if (lmode == SDP_LONG)
191 191 dp->usd_def32 = 0; /* 32-bit operands only */
192 192 else
193 193 /*
194 194 * 32-bit compatibility mode.
195 195 */
196 196 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */
197 197
198 198 dp->usd_long = lmode; /* 64-bit mode */
199 199 dp->usd_type = type;
200 200 dp->usd_dpl = dpl;
201 201 dp->usd_p = 1;
202 202 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */
203 203
204 204 dp->usd_lobase = (uintptr_t)base;
205 205 dp->usd_midbase = (uintptr_t)base >> 16;
206 206 dp->usd_hibase = (uintptr_t)base >> (16 + 8);
207 207 dp->usd_lolimit = size;
208 208 dp->usd_hilimit = (uintptr_t)size >> 16;
209 209 }
210 210
211 211 #elif defined(__i386)
212 212
213 213 /*
214 214 * Install user segment descriptor for code and data.
215 215 */
216 216 void
217 217 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
218 218 uint_t dpl, uint_t gran, uint_t defopsz)
219 219 {
220 220 dp->usd_lolimit = size;
221 221 dp->usd_hilimit = (uintptr_t)size >> 16;
222 222
223 223 dp->usd_lobase = (uintptr_t)base;
224 224 dp->usd_midbase = (uintptr_t)base >> 16;
225 225 dp->usd_hibase = (uintptr_t)base >> (16 + 8);
226 226
227 227 dp->usd_type = type;
228 228 dp->usd_dpl = dpl;
229 229 dp->usd_p = 1;
230 230 dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32 bit operands */
231 231 dp->usd_gran = gran; /* 0 = bytes, 1 = pages */
232 232 }
233 233
234 234 #endif /* __i386 */
235 235
236 236 /*
237 237 * Install system segment descriptor for LDT and TSS segments.
238 238 */
239 239
240 240 #if defined(__amd64)
241 241
242 242 void
243 243 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
244 244 uint_t dpl)
245 245 {
246 246 dp->ssd_lolimit = size;
247 247 dp->ssd_hilimit = (uintptr_t)size >> 16;
248 248
249 249 dp->ssd_lobase = (uintptr_t)base;
250 250 dp->ssd_midbase = (uintptr_t)base >> 16;
251 251 dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
252 252 dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
253 253
254 254 dp->ssd_type = type;
255 255 dp->ssd_zero1 = 0; /* must be zero */
256 256 dp->ssd_zero2 = 0;
257 257 dp->ssd_dpl = dpl;
258 258 dp->ssd_p = 1;
259 259 dp->ssd_gran = 0; /* force byte units */
260 260 }
261 261
262 262 void *
263 263 get_ssd_base(system_desc_t *dp)
264 264 {
265 265 uintptr_t base;
266 266
267 267 base = (uintptr_t)dp->ssd_lobase |
268 268 (uintptr_t)dp->ssd_midbase << 16 |
269 269 (uintptr_t)dp->ssd_hibase << (16 + 8) |
270 270 (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
271 271 return ((void *)base);
272 272 }
273 273
274 274 #elif defined(__i386)
275 275
276 276 void
277 277 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
278 278 uint_t dpl)
279 279 {
280 280 dp->ssd_lolimit = size;
281 281 dp->ssd_hilimit = (uintptr_t)size >> 16;
282 282
283 283 dp->ssd_lobase = (uintptr_t)base;
284 284 dp->ssd_midbase = (uintptr_t)base >> 16;
285 285 dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
286 286
287 287 dp->ssd_type = type;
288 288 dp->ssd_zero = 0; /* must be zero */
289 289 dp->ssd_dpl = dpl;
290 290 dp->ssd_p = 1;
291 291 dp->ssd_gran = 0; /* force byte units */
292 292 }
293 293
294 294 void *
295 295 get_ssd_base(system_desc_t *dp)
296 296 {
297 297 uintptr_t base;
298 298
299 299 base = (uintptr_t)dp->ssd_lobase |
300 300 (uintptr_t)dp->ssd_midbase << 16 |
301 301 (uintptr_t)dp->ssd_hibase << (16 + 8);
302 302 return ((void *)base);
303 303 }
304 304
305 305 #endif /* __i386 */
306 306
307 307 /*
308 308 * Install gate segment descriptor for interrupt, trap, call and task gates.
309 309 */
310 310
311 311 #if defined(__amd64)
312 312
313 313 /*ARGSUSED*/
314 314 void
315 315 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
316 316 uint_t type, uint_t dpl, uint_t vector)
317 317 {
318 318 dp->sgd_looffset = (uintptr_t)func;
319 319 dp->sgd_hioffset = (uintptr_t)func >> 16;
320 320 dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
321 321
322 322 dp->sgd_selector = (uint16_t)sel;
323 323
324 324 /*
325 325 * For 64 bit native we use the IST stack mechanism
326 326 * for double faults. All other traps use the CPL = 0
327 327 * (tss_rsp0) stack.
328 328 */
329 329 #if !defined(__xpv)
330 330 if (vector == T_DBLFLT)
331 331 dp->sgd_ist = 1;
332 332 else
333 333 #endif
334 334 dp->sgd_ist = 0;
335 335
336 336 dp->sgd_type = type;
337 337 dp->sgd_dpl = dpl;
338 338 dp->sgd_p = 1;
339 339 }
340 340
341 341 #elif defined(__i386)
342 342
343 343 /*ARGSUSED*/
344 344 void
345 345 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
346 346 uint_t type, uint_t dpl, uint_t unused)
347 347 {
348 348 dp->sgd_looffset = (uintptr_t)func;
349 349 dp->sgd_hioffset = (uintptr_t)func >> 16;
350 350
351 351 dp->sgd_selector = (uint16_t)sel;
352 352 dp->sgd_stkcpy = 0; /* always zero bytes */
353 353 dp->sgd_type = type;
354 354 dp->sgd_dpl = dpl;
355 355 dp->sgd_p = 1;
356 356 }
357 357
358 358 #endif /* __i386 */
359 359
360 360 /*
361 361 * Updates a single user descriptor in the the GDT of the current cpu.
362 362 * Caller is responsible for preventing cpu migration.
363 363 */
364 364
365 365 void
366 366 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
367 367 {
368 368 #if defined(__xpv)
369 369
370 370 uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
371 371
372 372 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
373 373 panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
374 374
375 375 #else /* __xpv */
376 376
377 377 CPU->cpu_gdt[sidx] = *udp;
378 378
379 379 #endif /* __xpv */
380 380 }
381 381
382 382 /*
383 383 * Writes single descriptor pointed to by udp into a processes
384 384 * LDT entry pointed to by ldp.
385 385 */
386 386 int
387 387 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
388 388 {
389 389 #if defined(__xpv)
390 390
391 391 uint64_t dpa;
392 392
393 393 dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
394 394 ((uintptr_t)ldp & PAGEOFFSET);
395 395
396 396 /*
397 397 * The hypervisor is a little more restrictive about what it
398 398 * supports in the LDT.
399 399 */
400 400 if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
401 401 return (EINVAL);
402 402
403 403 #else /* __xpv */
404 404
405 405 *ldp = *udp;
406 406
407 407 #endif /* __xpv */
408 408 return (0);
409 409 }
410 410
411 411 #if defined(__xpv)
412 412
413 413 /*
414 414 * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
415 415 * Returns true if a valid entry was written.
416 416 */
417 417 int
418 418 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
419 419 {
420 420 trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */
421 421
422 422 /*
423 423 * skip holes in the IDT
424 424 */
425 425 if (GATESEG_GETOFFSET(sgd) == 0)
426 426 return (0);
427 427
428 428 ASSERT(sgd->sgd_type == SDT_SYSIGT);
429 429 ti->vector = vec;
430 430 TI_SET_DPL(ti, sgd->sgd_dpl);
431 431
432 432 /*
433 433 * Is this an interrupt gate?
434 434 */
435 435 if (sgd->sgd_type == SDT_SYSIGT) {
436 436 /* LINTED */
437 437 TI_SET_IF(ti, 1);
438 438 }
439 439 ti->cs = sgd->sgd_selector;
440 440 #if defined(__amd64)
441 441 ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */
442 442 #endif
443 443 ti->address = GATESEG_GETOFFSET(sgd);
444 444 return (1);
445 445 }
446 446
447 447 /*
448 448 * Convert a single hw format gate descriptor and write it into our virtual IDT.
449 449 */
450 450 void
451 451 xen_idt_write(gate_desc_t *sgd, uint_t vec)
452 452 {
453 453 trap_info_t trapinfo[2];
454 454
455 455 bzero(trapinfo, sizeof (trapinfo));
456 456 if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
457 457 return;
458 458 if (xen_set_trap_table(trapinfo) != 0)
459 459 panic("xen_idt_write: xen_set_trap_table() failed");
460 460 }
461 461
462 462 #endif /* __xpv */
463 463
464 464 #if defined(__amd64)
465 465
466 466 /*
467 467 * Build kernel GDT.
468 468 */
469 469
470 470 static void
471 471 init_gdt_common(user_desc_t *gdt)
472 472 {
473 473 int i;
474 474
475 475 /*
476 476 * 64-bit kernel code segment.
477 477 */
478 478 set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
479 479 SDP_PAGES, SDP_OP32);
480 480
481 481 /*
482 482 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
483 483 * mode, but we set it here to 0xFFFF so that we can use the SYSRET
484 484 * instruction to return from system calls back to 32-bit applications.
485 485 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
486 486 * descriptors. We therefore must ensure that the kernel uses something,
487 487 * though it will be ignored by hardware, that is compatible with 32-bit
488 488 * apps. For the same reason we must set the default op size of this
489 489 * descriptor to 32-bit operands.
490 490 */
491 491 set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
492 492 SEL_KPL, SDP_PAGES, SDP_OP32);
493 493 gdt[GDT_KDATA].usd_def32 = 1;
494 494
495 495 /*
496 496 * 64-bit user code segment.
497 497 */
498 498 set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
499 499 SDP_PAGES, SDP_OP32);
500 500
501 501 /*
502 502 * 32-bit user code segment.
503 503 */
504 504 set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
505 505 SEL_UPL, SDP_PAGES, SDP_OP32);
506 506
507 507 /*
508 508 * See gdt_ucode32() and gdt_ucode_native().
509 509 */
510 510 ucs_on = ucs_off = gdt[GDT_UCODE];
511 511 ucs_off.usd_p = 0; /* forces #np fault */
512 512
513 513 ucs32_on = ucs32_off = gdt[GDT_U32CODE];
514 514 ucs32_off.usd_p = 0; /* forces #np fault */
515 515
516 516 /*
517 517 * 32 and 64 bit data segments can actually share the same descriptor.
518 518 * In long mode only the present bit is checked but all other fields
519 519 * are loaded. But in compatibility mode all fields are interpreted
520 520 * as in legacy mode so they must be set correctly for a 32-bit data
521 521 * segment.
522 522 */
523 523 set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
524 524 SDP_PAGES, SDP_OP32);
525 525
526 526 #if !defined(__xpv)
527 527
528 528 /*
529 529 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
530 530 * in the GDT is 0.
531 531 */
532 532
533 533 /*
534 534 * Kernel TSS
535 535 */
536 536 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
537 537 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
538 538
539 539 #endif /* !__xpv */
540 540
541 541 /*
542 542 * Initialize fs and gs descriptors for 32 bit processes.
543 543 * Only attributes and limits are initialized, the effective
544 544 * base address is programmed via fsbase/gsbase.
545 545 */
546 546 set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
547 547 SEL_UPL, SDP_PAGES, SDP_OP32);
548 548 set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
549 549 SEL_UPL, SDP_PAGES, SDP_OP32);
550 550
551 551 /*
552 552 * Initialize the descriptors set aside for brand usage.
553 553 * Only attributes and limits are initialized.
554 554 */
555 555 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
556 556 set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
557 557 SEL_UPL, SDP_PAGES, SDP_OP32);
558 558
559 559 /*
560 560 * Initialize convenient zero base user descriptors for clearing
561 561 * lwp private %fs and %gs descriptors in GDT. See setregs() for
562 562 * an example.
563 563 */
564 564 set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
565 565 SDP_BYTES, SDP_OP32);
566 566 set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
567 567 SDP_PAGES, SDP_OP32);
568 568 }
569 569
570 570 #if defined(__xpv)
571 571
572 572 static user_desc_t *
573 573 init_gdt(void)
574 574 {
575 575 uint64_t gdtpa;
576 576 ulong_t ma[1]; /* XXPV should be a memory_t */
577 577 ulong_t addr;
578 578
579 579 #if !defined(__lint)
580 580 /*
581 581 * Our gdt is never larger than a single page.
582 582 */
583 583 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
584 584 #endif
585 585 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
586 586 PAGESIZE, PAGESIZE);
587 587 bzero(gdt0, PAGESIZE);
588 588
589 589 init_gdt_common(gdt0);
590 590
591 591 /*
592 592 * XXX Since we never invoke kmdb until after the kernel takes
593 593 * over the descriptor tables why not have it use the kernel's
594 594 * selectors?
595 595 */
596 596 if (boothowto & RB_DEBUG) {
597 597 set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
598 598 SEL_KPL, SDP_PAGES, SDP_OP32);
599 599 set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
600 600 SEL_KPL, SDP_PAGES, SDP_OP32);
601 601 }
602 602
603 603 /*
604 604 * Clear write permission for page containing the gdt and install it.
605 605 */
606 606 gdtpa = pfn_to_pa(va_to_pfn(gdt0));
607 607 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
608 608 kbm_read_only((uintptr_t)gdt0, gdtpa);
609 609 xen_set_gdt(ma, NGDT);
610 610
611 611 /*
612 612 * Reload the segment registers to use the new GDT.
613 613 * On 64-bit, fixup KCS_SEL to be in ring 3.
614 614 * See KCS_SEL in segments.h.
615 615 */
616 616 load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
617 617
618 618 /*
619 619 * setup %gs for kernel
620 620 */
621 621 xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
622 622
623 623 /*
624 624 * XX64 We should never dereference off "other gsbase" or
625 625 * "fsbase". So, we should arrange to point FSBASE and
626 626 * KGSBASE somewhere truly awful e.g. point it at the last
627 627 * valid address below the hole so that any attempts to index
628 628 * off them cause an exception.
629 629 *
630 630 * For now, point it at 8G -- at least it should be unmapped
631 631 * until some 64-bit processes run.
632 632 */
633 633 addr = 0x200000000ul;
634 634 xen_set_segment_base(SEGBASE_FS, addr);
635 635 xen_set_segment_base(SEGBASE_GS_USER, addr);
636 636 xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
637 637
638 638 return (gdt0);
639 639 }
640 640
641 641 #else /* __xpv */
642 642
643 643 static user_desc_t *
644 644 init_gdt(void)
645 645 {
646 646 desctbr_t r_bgdt, r_gdt;
647 647 user_desc_t *bgdt;
648 648
649 649 #if !defined(__lint)
650 650 /*
651 651 * Our gdt is never larger than a single page.
652 652 */
653 653 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
654 654 #endif
655 655 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
656 656 PAGESIZE, PAGESIZE);
657 657 bzero(gdt0, PAGESIZE);
658 658
659 659 init_gdt_common(gdt0);
660 660
661 661 /*
662 662 * Copy in from boot's gdt to our gdt.
663 663 * Entry 0 is the null descriptor by definition.
664 664 */
665 665 rd_gdtr(&r_bgdt);
666 666 bgdt = (user_desc_t *)r_bgdt.dtr_base;
667 667 if (bgdt == NULL)
668 668 panic("null boot gdt");
669 669
670 670 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
671 671 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
672 672 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
673 673 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
674 674 gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
675 675
676 676 /*
677 677 * Install our new GDT
678 678 */
679 679 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
680 680 r_gdt.dtr_base = (uintptr_t)gdt0;
681 681 wr_gdtr(&r_gdt);
682 682
683 683 /*
684 684 * Reload the segment registers to use the new GDT
685 685 */
686 686 load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
687 687
688 688 /*
689 689 * setup %gs for kernel
690 690 */
691 691 wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
692 692
693 693 /*
694 694 * XX64 We should never dereference off "other gsbase" or
695 695 * "fsbase". So, we should arrange to point FSBASE and
696 696 * KGSBASE somewhere truly awful e.g. point it at the last
697 697 * valid address below the hole so that any attempts to index
698 698 * off them cause an exception.
699 699 *
700 700 * For now, point it at 8G -- at least it should be unmapped
701 701 * until some 64-bit processes run.
702 702 */
703 703 wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
704 704 wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
705 705 return (gdt0);
706 706 }
707 707
708 708 #endif /* __xpv */
709 709
710 710 #elif defined(__i386)
711 711
712 712 static void
713 713 init_gdt_common(user_desc_t *gdt)
714 714 {
715 715 int i;
716 716
717 717 /*
718 718 * Text and data for both kernel and user span entire 32 bit
719 719 * address space.
720 720 */
721 721
722 722 /*
723 723 * kernel code segment.
724 724 */
725 725 set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
726 726 SDP_OP32);
727 727
728 728 /*
729 729 * kernel data segment.
730 730 */
731 731 set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
732 732 SDP_OP32);
733 733
734 734 /*
735 735 * user code segment.
736 736 */
737 737 set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
738 738 SDP_OP32);
739 739
740 740 /*
741 741 * user data segment.
742 742 */
743 743 set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
744 744 SDP_OP32);
745 745
746 746 #if !defined(__xpv)
747 747
748 748 /*
749 749 * TSS for T_DBLFLT (double fault) handler
750 750 */
751 751 set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], dftss0,
752 752 sizeof (*dftss0) - 1, SDT_SYSTSS, SEL_KPL);
753 753
754 754 /*
755 755 * TSS for kernel
756 756 */
757 757 set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
758 758 sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
759 759
760 760 #endif /* !__xpv */
761 761
762 762 /*
763 763 * %gs selector for kernel
764 764 */
765 765 set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
766 766 SEL_KPL, SDP_BYTES, SDP_OP32);
767 767
768 768 /*
769 769 * Initialize lwp private descriptors.
770 770 * Only attributes and limits are initialized, the effective
771 771 * base address is programmed via fsbase/gsbase.
772 772 */
773 773 set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
774 774 SDP_PAGES, SDP_OP32);
775 775 set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
776 776 SDP_PAGES, SDP_OP32);
777 777
778 778 /*
779 779 * Initialize the descriptors set aside for brand usage.
780 780 * Only attributes and limits are initialized.
781 781 */
782 782 for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
783 783 set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
784 784 SDP_PAGES, SDP_OP32);
785 785 /*
786 786 * Initialize convenient zero base user descriptor for clearing
787 787 * lwp private %fs and %gs descriptors in GDT. See setregs() for
788 788 * an example.
789 789 */
790 790 set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
791 791 SDP_BYTES, SDP_OP32);
792 792 }
793 793
794 794 #if defined(__xpv)
795 795
796 796 static user_desc_t *
797 797 init_gdt(void)
798 798 {
799 799 uint64_t gdtpa;
800 800 ulong_t ma[1]; /* XXPV should be a memory_t */
801 801
802 802 #if !defined(__lint)
803 803 /*
804 804 * Our gdt is never larger than a single page.
805 805 */
806 806 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
807 807 #endif
808 808 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
809 809 PAGESIZE, PAGESIZE);
810 810 bzero(gdt0, PAGESIZE);
811 811
812 812 init_gdt_common(gdt0);
813 813 gdtpa = pfn_to_pa(va_to_pfn(gdt0));
814 814
815 815 /*
816 816 * XXX Since we never invoke kmdb until after the kernel takes
817 817 * over the descriptor tables why not have it use the kernel's
818 818 * selectors?
819 819 */
820 820 if (boothowto & RB_DEBUG) {
821 821 set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
822 822 SDP_PAGES, SDP_OP32);
823 823 set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
824 824 SDP_PAGES, SDP_OP32);
825 825 }
826 826
827 827 /*
828 828 * Clear write permission for page containing the gdt and install it.
829 829 */
830 830 ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
831 831 kbm_read_only((uintptr_t)gdt0, gdtpa);
832 832 xen_set_gdt(ma, NGDT);
833 833
834 834 /*
835 835 * Reload the segment registers to use the new GDT
836 836 */
837 837 load_segment_registers(
838 838 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
839 839
840 840 return (gdt0);
841 841 }
842 842
843 843 #else /* __xpv */
844 844
845 845 static user_desc_t *
846 846 init_gdt(void)
847 847 {
848 848 desctbr_t r_bgdt, r_gdt;
849 849 user_desc_t *bgdt;
850 850
851 851 #if !defined(__lint)
852 852 /*
853 853 * Our gdt is never larger than a single page.
854 854 */
855 855 ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
856 856 #endif
857 857 /*
858 858 * XXX this allocation belongs in our caller, not here.
859 859 */
860 860 gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
861 861 PAGESIZE, PAGESIZE);
862 862 bzero(gdt0, PAGESIZE);
863 863
864 864 init_gdt_common(gdt0);
865 865
866 866 /*
867 867 * Copy in from boot's gdt to our gdt entries.
868 868 * Entry 0 is null descriptor by definition.
869 869 */
870 870 rd_gdtr(&r_bgdt);
871 871 bgdt = (user_desc_t *)r_bgdt.dtr_base;
872 872 if (bgdt == NULL)
873 873 panic("null boot gdt");
874 874
875 875 gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
876 876 gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
877 877 gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
878 878 gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
879 879
880 880 /*
881 881 * Install our new GDT
882 882 */
883 883 r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
884 884 r_gdt.dtr_base = (uintptr_t)gdt0;
885 885 wr_gdtr(&r_gdt);
886 886
887 887 /*
888 888 * Reload the segment registers to use the new GDT
889 889 */
890 890 load_segment_registers(
891 891 KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
892 892
893 893 return (gdt0);
894 894 }
895 895
896 896 #endif /* __xpv */
897 897 #endif /* __i386 */
898 898
899 899 /*
900 900 * Build kernel IDT.
901 901 *
902 902 * Note that for amd64 we pretty much require every gate to be an interrupt
903 903 * gate which blocks interrupts atomically on entry; that's because of our
904 904 * dependency on using 'swapgs' every time we come into the kernel to find
905 905 * the cpu structure. If we get interrupted just before doing that, %cs could
906 906 * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
907 907 * %gsbase is really still pointing at something in userland. Bad things will
908 908 * ensue. We also use interrupt gates for i386 as well even though this is not
909 909 * required for some traps.
910 910 *
911 911 * Perhaps they should have invented a trap gate that does an atomic swapgs?
912 912 */
913 913 static void
914 914 init_idt_common(gate_desc_t *idt)
915 915 {
916 916 set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
917 917 0);
918 918 set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
919 919 0);
920 920 set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL,
921 921 0);
922 922 set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
923 923 0);
924 924 set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
925 925 0);
926 926 set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
927 927 TRP_KPL, 0);
928 928 set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
929 929 0);
930 930 set_gatesegd(&idt[T_NOEXTFLT], &ndptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
931 931 0);
932 932
933 933 /*
934 934 * double fault handler.
935 935 *
936 936 * Note that on the hypervisor a guest does not receive #df faults.
937 937 * Instead a failsafe event is injected into the guest if its selectors
938 938 * and/or stack is in a broken state. See xen_failsafe_callback.
939 939 */
940 940 #if !defined(__xpv)
941 941 #if defined(__amd64)
942 942
943 943 set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
944 944 T_DBLFLT);
945 945
946 946 #elif defined(__i386)
947 947
948 948 /*
949 949 * task gate required.
950 950 */
951 951 set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL,
952 952 0);
953 953
954 954 #endif /* __i386 */
955 955 #endif /* !__xpv */
956 956
957 957 /*
958 958 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
959 959 */
960 960
961 961 set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
962 962 0);
963 963 set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
964 964 0);
965 965 set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
↓ open down ↓ |
795 lines elided |
↑ open up ↑ |
966 966 set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
967 967 set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
968 968 set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
969 969 0);
970 970 set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
971 971 TRP_KPL, 0);
972 972 set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
973 973 set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
974 974
975 975 /*
976 + * install "int80" handler at, well, 0x80.
977 + */
978 + set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, TRP_UPL,
979 + 0);
980 +
981 + /*
976 982 * install fast trap handler at 210.
977 983 */
978 984 set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL,
979 985 0);
980 986
981 987 /*
982 988 * System call handler.
983 989 */
984 990 #if defined(__amd64)
985 991 set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
986 992 TRP_UPL, 0);
987 993
988 994 #elif defined(__i386)
989 995 set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
990 996 TRP_UPL, 0);
991 997 #endif /* __i386 */
992 998
993 999 /*
994 1000 * Install the DTrace interrupt handler for the pid provider.
995 1001 */
996 1002 set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
997 1003 SDT_SYSIGT, TRP_UPL, 0);
998 1004
999 1005 /*
1000 - * Prepare interposing descriptor for the syscall handler
1001 - * and cache copy of the default descriptor.
1006 + * Prepare interposing descriptors for the branded "int80"
1007 + * and syscall handlers and cache copies of the default
1008 + * descriptors.
1002 1009 */
1003 - brand_tbl[0].ih_inum = T_SYSCALLINT;
1004 - brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
1010 + brand_tbl[0].ih_inum = T_INT80;
1011 + brand_tbl[0].ih_default_desc = idt0[T_INT80];
1012 + set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL,
1013 + SDT_SYSIGT, TRP_UPL, 0);
1005 1014
1015 + brand_tbl[1].ih_inum = T_SYSCALLINT;
1016 + brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT];
1017 +
1006 1018 #if defined(__amd64)
1007 - set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int,
1019 + set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int,
1008 1020 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1009 1021 #elif defined(__i386)
1010 - set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call,
1022 + set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call,
1011 1023 KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
1012 1024 #endif /* __i386 */
1013 1025
1014 - brand_tbl[1].ih_inum = 0;
1026 + brand_tbl[2].ih_inum = 0;
1015 1027 }
1016 1028
1017 1029 #if defined(__xpv)
1018 1030
1019 1031 static void
1020 1032 init_idt(gate_desc_t *idt)
1021 1033 {
1022 1034 init_idt_common(idt);
1023 1035 }
1024 1036
1025 1037 #else /* __xpv */
1026 1038
1027 1039 static void
1028 1040 init_idt(gate_desc_t *idt)
1029 1041 {
1030 1042 char ivctname[80];
1031 1043 void (*ivctptr)(void);
1032 1044 int i;
1033 1045
1034 1046 /*
1035 1047 * Initialize entire table with 'reserved' trap and then overwrite
1036 1048 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1037 1049 * since it can only be generated on a 386 processor. 15 is also
1038 1050 * unsupported and reserved.
1039 1051 */
1040 1052 for (i = 0; i < NIDT; i++)
1041 1053 set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1042 1054 0);
1043 1055
1044 1056 /*
1045 1057 * 20-31 reserved
1046 1058 */
1047 1059 for (i = 20; i < 32; i++)
1048 1060 set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
1049 1061 0);
1050 1062
1051 1063 /*
1052 1064 * interrupts 32 - 255
1053 1065 */
1054 1066 for (i = 32; i < 256; i++) {
1055 1067 (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
1056 1068 ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1057 1069 if (ivctptr == NULL)
1058 1070 panic("kobj_getsymvalue(%s) failed", ivctname);
1059 1071
1060 1072 set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
1061 1073 }
1062 1074
1063 1075 /*
1064 1076 * Now install the common ones. Note that it will overlay some
1065 1077 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1066 1078 */
1067 1079 init_idt_common(idt);
1068 1080 }
1069 1081
1070 1082 #endif /* __xpv */
1071 1083
1072 1084 /*
1073 1085 * The kernel does not deal with LDTs unless a user explicitly creates
1074 1086 * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1075 1087 * to reference the LDT will therefore cause a #gp. System calls made via the
1076 1088 * obsolete lcall mechanism are emulated by the #gp fault handler.
1077 1089 */
1078 1090 static void
1079 1091 init_ldt(void)
1080 1092 {
1081 1093 #if defined(__xpv)
1082 1094 xen_set_ldt(NULL, 0);
1083 1095 #else
1084 1096 wr_ldtr(0);
1085 1097 #endif
1086 1098 }
1087 1099
1088 1100 #if !defined(__xpv)
1089 1101 #if defined(__amd64)
1090 1102
1091 1103 static void
1092 1104 init_tss(void)
1093 1105 {
1094 1106 /*
1095 1107 * tss_rsp0 is dynamically filled in by resume() on each context switch.
1096 1108 * All exceptions but #DF will run on the thread stack.
1097 1109 * Set up the double fault stack here.
1098 1110 */
1099 1111 ktss0->tss_ist1 =
1100 1112 (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1101 1113
1102 1114 /*
1103 1115 * Set I/O bit map offset equal to size of TSS segment limit
1104 1116 * for no I/O permission map. This will force all user I/O
1105 1117 * instructions to generate #gp fault.
1106 1118 */
1107 1119 ktss0->tss_bitmapbase = sizeof (*ktss0);
1108 1120
1109 1121 /*
1110 1122 * Point %tr to descriptor for ktss0 in gdt.
1111 1123 */
1112 1124 wr_tsr(KTSS_SEL);
1113 1125 }
1114 1126
1115 1127 #elif defined(__i386)
1116 1128
1117 1129 static void
1118 1130 init_tss(void)
1119 1131 {
1120 1132 /*
1121 1133 * ktss0->tss_esp dynamically filled in by resume() on each
1122 1134 * context switch.
1123 1135 */
1124 1136 ktss0->tss_ss0 = KDS_SEL;
1125 1137 ktss0->tss_eip = (uint32_t)_start;
1126 1138 ktss0->tss_ds = ktss0->tss_es = ktss0->tss_ss = KDS_SEL;
1127 1139 ktss0->tss_cs = KCS_SEL;
1128 1140 ktss0->tss_fs = KFS_SEL;
1129 1141 ktss0->tss_gs = KGS_SEL;
1130 1142 ktss0->tss_ldt = ULDT_SEL;
1131 1143
1132 1144 /*
1133 1145 * Initialize double fault tss.
1134 1146 */
1135 1147 dftss0->tss_esp0 = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1136 1148 dftss0->tss_ss0 = KDS_SEL;
1137 1149
1138 1150 /*
1139 1151 * tss_cr3 will get initialized in hat_kern_setup() once our page
1140 1152 * tables have been setup.
1141 1153 */
1142 1154 dftss0->tss_eip = (uint32_t)syserrtrap;
1143 1155 dftss0->tss_esp = (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1144 1156 dftss0->tss_cs = KCS_SEL;
1145 1157 dftss0->tss_ds = KDS_SEL;
1146 1158 dftss0->tss_es = KDS_SEL;
1147 1159 dftss0->tss_ss = KDS_SEL;
1148 1160 dftss0->tss_fs = KFS_SEL;
1149 1161 dftss0->tss_gs = KGS_SEL;
1150 1162
1151 1163 /*
1152 1164 * Set I/O bit map offset equal to size of TSS segment limit
1153 1165 * for no I/O permission map. This will force all user I/O
1154 1166 * instructions to generate #gp fault.
1155 1167 */
1156 1168 ktss0->tss_bitmapbase = sizeof (*ktss0);
1157 1169
1158 1170 /*
1159 1171 * Point %tr to descriptor for ktss0 in gdt.
1160 1172 */
1161 1173 wr_tsr(KTSS_SEL);
1162 1174 }
1163 1175
1164 1176 #endif /* __i386 */
1165 1177 #endif /* !__xpv */
1166 1178
1167 1179 #if defined(__xpv)
1168 1180
1169 1181 void
1170 1182 init_desctbls(void)
1171 1183 {
1172 1184 uint_t vec;
1173 1185 user_desc_t *gdt;
1174 1186
1175 1187 /*
1176 1188 * Setup and install our GDT.
1177 1189 */
1178 1190 gdt = init_gdt();
1179 1191
1180 1192 /*
1181 1193 * Store static pa of gdt to speed up pa_to_ma() translations
1182 1194 * on lwp context switches.
1183 1195 */
1184 1196 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1185 1197 CPU->cpu_gdt = gdt;
1186 1198 CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
1187 1199
1188 1200 /*
1189 1201 * Setup and install our IDT.
1190 1202 */
1191 1203 #if !defined(__lint)
1192 1204 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1193 1205 #endif
1194 1206 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1195 1207 PAGESIZE, PAGESIZE);
1196 1208 bzero(idt0, PAGESIZE);
1197 1209 init_idt(idt0);
1198 1210 for (vec = 0; vec < NIDT; vec++)
1199 1211 xen_idt_write(&idt0[vec], vec);
1200 1212
1201 1213 CPU->cpu_idt = idt0;
1202 1214
1203 1215 /*
1204 1216 * set default kernel stack
1205 1217 */
1206 1218 xen_stack_switch(KDS_SEL,
1207 1219 (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1208 1220
1209 1221 xen_init_callbacks();
1210 1222
1211 1223 init_ldt();
1212 1224 }
1213 1225
1214 1226 #else /* __xpv */
1215 1227
1216 1228 void
1217 1229 init_desctbls(void)
1218 1230 {
1219 1231 user_desc_t *gdt;
1220 1232 desctbr_t idtr;
1221 1233
1222 1234 /*
1223 1235 * Allocate IDT and TSS structures on unique pages for better
1224 1236 * performance in virtual machines.
1225 1237 */
1226 1238 #if !defined(__lint)
1227 1239 ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1228 1240 #endif
1229 1241 idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1230 1242 PAGESIZE, PAGESIZE);
1231 1243 bzero(idt0, PAGESIZE);
1232 1244 #if !defined(__lint)
1233 1245 ASSERT(sizeof (*ktss0) <= PAGESIZE);
1234 1246 #endif
1235 1247 ktss0 = (struct tss *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
1236 1248 PAGESIZE, PAGESIZE);
1237 1249 bzero(ktss0, PAGESIZE);
1238 1250
1239 1251 #if defined(__i386)
1240 1252 #if !defined(__lint)
1241 1253 ASSERT(sizeof (*dftss0) <= PAGESIZE);
1242 1254 #endif
1243 1255 dftss0 = (struct tss *)BOP_ALLOC(bootops, (caddr_t)DFTSS_VA,
1244 1256 PAGESIZE, PAGESIZE);
1245 1257 bzero(dftss0, PAGESIZE);
1246 1258 #endif
1247 1259
1248 1260 /*
1249 1261 * Setup and install our GDT.
1250 1262 */
1251 1263 gdt = init_gdt();
1252 1264 ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1253 1265 CPU->cpu_gdt = gdt;
1254 1266
1255 1267 /*
1256 1268 * Setup and install our IDT.
1257 1269 */
1258 1270 init_idt(idt0);
1259 1271
1260 1272 idtr.dtr_base = (uintptr_t)idt0;
1261 1273 idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1262 1274 wr_idtr(&idtr);
1263 1275 CPU->cpu_idt = idt0;
1264 1276
1265 1277 #if defined(__i386)
1266 1278 /*
1267 1279 * We maintain a description of idt0 in convenient IDTR format
1268 1280 * for #pf's on some older pentium processors. See pentium_pftrap().
1269 1281 */
1270 1282 idt0_default_r = idtr;
1271 1283 #endif /* __i386 */
1272 1284
1273 1285 init_tss();
1274 1286 CPU->cpu_tss = ktss0;
1275 1287 init_ldt();
1276 1288 }
1277 1289
1278 1290 #endif /* __xpv */
1279 1291
1280 1292 /*
1281 1293 * In the early kernel, we need to set up a simple GDT to run on.
1282 1294 *
1283 1295 * XXPV Can dboot use this too? See dboot_gdt.s
1284 1296 */
1285 1297 void
1286 1298 init_boot_gdt(user_desc_t *bgdt)
1287 1299 {
1288 1300 #if defined(__amd64)
1289 1301 set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1290 1302 SDP_PAGES, SDP_OP32);
1291 1303 set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1292 1304 SDP_PAGES, SDP_OP32);
1293 1305 #elif defined(__i386)
1294 1306 set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1295 1307 SDP_PAGES, SDP_OP32);
1296 1308 set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
1297 1309 SDP_PAGES, SDP_OP32);
1298 1310 #endif /* __i386 */
1299 1311 }
1300 1312
1301 1313 /*
1302 1314 * Enable interpositioning on the system call path by rewriting the
1303 1315 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1304 1316 * the branded entry points.
1305 1317 */
1306 1318 void
1307 1319 brand_interpositioning_enable(void)
1308 1320 {
1309 1321 gate_desc_t *idt = CPU->cpu_idt;
1310 1322 int i;
1311 1323
1312 1324 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1313 1325
1314 1326 for (i = 0; brand_tbl[i].ih_inum; i++) {
1315 1327 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1316 1328 #if defined(__xpv)
1317 1329 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1318 1330 brand_tbl[i].ih_inum);
1319 1331 #endif
1320 1332 }
1321 1333
1322 1334 #if defined(__amd64)
1323 1335 #if defined(__xpv)
1324 1336
1325 1337 /*
1326 1338 * Currently the hypervisor only supports 64-bit syscalls via
1327 1339 * syscall instruction. The 32-bit syscalls are handled by
1328 1340 * interrupt gate above.
1329 1341 */
1330 1342 xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1331 1343 CALLBACKF_mask_events);
1332 1344
1333 1345 #else
1334 1346
1335 1347 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1336 1348 wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1337 1349 wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1338 1350 }
1339 1351
1340 1352 #endif
1341 1353 #endif /* __amd64 */
1342 1354
1343 1355 if (is_x86_feature(x86_featureset, X86FSET_SEP))
1344 1356 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1345 1357 }
1346 1358
1347 1359 /*
1348 1360 * Disable interpositioning on the system call path by rewriting the
1349 1361 * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1350 1362 * the standard entry points, which bypass the interpositioning hooks.
1351 1363 */
1352 1364 void
1353 1365 brand_interpositioning_disable(void)
1354 1366 {
1355 1367 gate_desc_t *idt = CPU->cpu_idt;
1356 1368 int i;
1357 1369
1358 1370 ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1359 1371
1360 1372 for (i = 0; brand_tbl[i].ih_inum; i++) {
1361 1373 idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1362 1374 #if defined(__xpv)
1363 1375 xen_idt_write(&idt[brand_tbl[i].ih_inum],
1364 1376 brand_tbl[i].ih_inum);
1365 1377 #endif
1366 1378 }
1367 1379
1368 1380 #if defined(__amd64)
1369 1381 #if defined(__xpv)
1370 1382
1371 1383 /*
1372 1384 * See comment above in brand_interpositioning_enable.
1373 1385 */
1374 1386 xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1375 1387 CALLBACKF_mask_events);
1376 1388
1377 1389 #else
1378 1390
1379 1391 if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1380 1392 wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1381 1393 wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1382 1394 }
1383 1395
1384 1396 #endif
1385 1397 #endif /* __amd64 */
1386 1398
1387 1399 if (is_x86_feature(x86_featureset, X86FSET_SEP))
1388 1400 wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1389 1401 }
↓ open down ↓ |
365 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX