diff options
| -rw-r--r-- | Documentation/arch/x86/boot.rst | 198 | ||||
| -rw-r--r-- | arch/x86/include/asm/bug.h | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/irq_remapping.h | 7 | ||||
| -rw-r--r-- | arch/x86/include/asm/uv/bios.h | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/sgx/ioctl.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/fpu/xstate.c | 4 | ||||
| -rw-r--r-- | arch/x86/kernel/irq.c | 23 | ||||
| -rw-r--r-- | drivers/iommu/intel/irq_remapping.c | 8 | ||||
| -rw-r--r-- | include/linux/mm_types.h | 1 | ||||
| -rw-r--r-- | include/trace/events/tlb.h | 5 |
10 files changed, 141 insertions, 111 deletions
diff --git a/Documentation/arch/x86/boot.rst b/Documentation/arch/x86/boot.rst index 6d36ce86fd8e..dca3875a2435 100644 --- a/Documentation/arch/x86/boot.rst +++ b/Documentation/arch/x86/boot.rst @@ -95,26 +95,26 @@ Memory Layout The traditional memory map for the kernel loader, used for Image or zImage kernels, typically looks like:: - | | + | | 0A0000 +------------------------+ - | Reserved for BIOS | Do not use. Reserved for BIOS EBDA. + | Reserved for BIOS | Do not use. Reserved for BIOS EBDA. 09A000 +------------------------+ - | Command line | - | Stack/heap | For use by the kernel real-mode code. + | Command line | + | Stack/heap | For use by the kernel real-mode code. 098000 +------------------------+ - | Kernel setup | The kernel real-mode code. + | Kernel setup | The kernel real-mode code. 090200 +------------------------+ - | Kernel boot sector | The kernel legacy boot sector. + | Kernel boot sector | The kernel legacy boot sector. 090000 +------------------------+ - | Protected-mode kernel | The bulk of the kernel image. + | Protected-mode kernel | The bulk of the kernel image. 010000 +------------------------+ - | Boot loader | <- Boot sector entry point 0000:7C00 + | Boot loader | <- Boot sector entry point 0000:7C00 001000 +------------------------+ - | Reserved for MBR/BIOS | + | Reserved for MBR/BIOS | 000800 +------------------------+ - | Typically used by MBR | + | Typically used by MBR | 000600 +------------------------+ - | BIOS use only | + | BIOS use only | 000000 +------------------------+ When using bzImage, the protected-mode kernel was relocated to @@ -142,27 +142,27 @@ above the 0x9A000 point; too many BIOSes will break above that point. For a modern bzImage kernel with boot protocol version >= 2.02, a memory layout like the following is suggested:: - ~ ~ - | Protected-mode kernel | + ~ ~ + | Protected-mode kernel | 100000 +------------------------+ - | I/O memory hole | + | I/O memory hole | 0A0000 +------------------------+ - | Reserved for BIOS | Leave as much as possible unused - ~ ~ - | Command line | (Can also be below the X+10000 mark) + | Reserved for BIOS | Leave as much as possible unused + ~ ~ + | Command line | (Can also be below the X+10000 mark) X+10000 +------------------------+ - | Stack/heap | For use by the kernel real-mode code. + | Stack/heap | For use by the kernel real-mode code. X+08000 +------------------------+ - | Kernel setup | The kernel real-mode code. - | Kernel boot sector | The kernel legacy boot sector. + | Kernel setup | The kernel real-mode code. + | Kernel boot sector | The kernel legacy boot sector. X +------------------------+ - | Boot loader | <- Boot sector entry point 0000:7C00 + | Boot loader | <- Boot sector entry point 0000:7C00 001000 +------------------------+ - | Reserved for MBR/BIOS | + | Reserved for MBR/BIOS | 000800 +------------------------+ - | Typically used by MBR | + | Typically used by MBR | 000600 +------------------------+ - | BIOS use only | + | BIOS use only | 000000 +------------------------+ ... where the address X is as low as the design of the boot loader permits. @@ -433,7 +433,7 @@ Protocol: 2.00+ Assigned boot loader IDs: - == ======================================= + ==== ======================================= 0x0 LILO (0x00 reserved for pre-2.00 bootloader) 0x1 Loadlin @@ -456,7 +456,7 @@ Protocol: 2.00+ <http://sebastian-plotz.blogspot.de> 0x12 OVMF UEFI virtualization stack 0x13 barebox - == ======================================= + ==== ======================================= Please contact <hpa@zytor.com> if you need a bootloader ID value assigned. @@ -809,12 +809,12 @@ Protocol: 2.09+ as follow:: struct setup_data { - __u64 next; - __u32 type; - __u32 len; - __u8 data[]; + __u64 next; + __u32 type; + __u32 len; + __u8 data[]; } - + Where, the next is a 64-bit physical pointer to the next node of linked list, the next field of the last node is 0; the type is used to identify the contents of data; the len is the length of data @@ -835,10 +835,10 @@ Protocol: 2.09+ protocol 2.15:: struct setup_indirect { - __u32 type; - __u32 reserved; /* Reserved, must be set to zero. */ - __u64 len; - __u64 addr; + __u32 type; + __u32 reserved; /* Reserved, must be set to zero. */ + __u64 len; + __u64 addr; }; The type member is a SETUP_INDIRECT | SETUP_* type. However, it cannot be @@ -850,15 +850,15 @@ Protocol: 2.09+ In this case setup_data and setup_indirect will look like this:: struct setup_data { - .next = 0, /* or <addr_of_next_setup_data_struct> */ - .type = SETUP_INDIRECT, - .len = sizeof(setup_indirect), - .data[sizeof(setup_indirect)] = (struct setup_indirect) { - .type = SETUP_INDIRECT | SETUP_E820_EXT, - .reserved = 0, - .len = <len_of_SETUP_E820_EXT_data>, - .addr = <addr_of_SETUP_E820_EXT_data>, - }, + .next = 0, /* or <addr_of_next_setup_data_struct> */ + .type = SETUP_INDIRECT, + .len = sizeof(setup_indirect), + .data[sizeof(setup_indirect)] = (struct setup_indirect) { + .type = SETUP_INDIRECT | SETUP_E820_EXT, + .reserved = 0, + .len = <len_of_SETUP_E820_EXT_data>, + .addr = <addr_of_SETUP_E820_EXT_data>, + }, } .. note:: @@ -897,11 +897,11 @@ Offset/size: 0x260/4 The kernel runtime start address is determined by the following algorithm:: if (relocatable_kernel) { - if (load_address < pref_address) - load_address = pref_address; - runtime_start = align_up(load_address, kernel_alignment); + if (load_address < pref_address) + load_address = pref_address; + runtime_start = align_up(load_address, kernel_alignment); } else { - runtime_start = pref_address; + runtime_start = pref_address; } Hence the necessary memory window location and size can be estimated by @@ -975,22 +975,22 @@ after kernel_info_var_len_data label. Each chunk of variable size data has to be prefixed with header/magic and its size, e.g.:: kernel_info: - .ascii "LToP" /* Header, Linux top (structure). */ - .long kernel_info_var_len_data - kernel_info - .long kernel_info_end - kernel_info - .long 0x01234567 /* Some fixed size data for the bootloaders. */ + .ascii "LToP" /* Header, Linux top (structure). */ + .long kernel_info_var_len_data - kernel_info + .long kernel_info_end - kernel_info + .long 0x01234567 /* Some fixed size data for the bootloaders. */ kernel_info_var_len_data: example_struct: /* Some variable size data for the bootloaders. */ - .ascii "0123" /* Header/Magic. */ - .long example_struct_end - example_struct - .ascii "Struct" - .long 0x89012345 + .ascii "0123" /* Header/Magic. */ + .long example_struct_end - example_struct + .ascii "Struct" + .long 0x89012345 example_struct_end: example_strings: /* Some variable size data for the bootloaders. */ - .ascii "ABCD" /* Header/Magic. */ - .long example_strings_end - example_strings - .asciz "String_0" - .asciz "String_1" + .ascii "ABCD" /* Header/Magic. */ + .long example_strings_end - example_strings + .asciz "String_0" + .asciz "String_1" example_strings_end: kernel_info_end: @@ -1132,53 +1132,53 @@ Such a boot loader should enter the following fields in the header:: unsigned long base_ptr; /* base address for real-mode segment */ if (setup_sects == 0) - setup_sects = 4; + setup_sects = 4; if (protocol >= 0x0200) { - type_of_loader = <type code>; - if (loading_initrd) { - ramdisk_image = <initrd_address>; - ramdisk_size = <initrd_size>; - } - - if (protocol >= 0x0202 && loadflags & 0x01) - heap_end = 0xe000; - else - heap_end = 0x9800; - - if (protocol >= 0x0201) { - heap_end_ptr = heap_end - 0x200; - loadflags |= 0x80; /* CAN_USE_HEAP */ - } - - if (protocol >= 0x0202) { - cmd_line_ptr = base_ptr + heap_end; - strcpy(cmd_line_ptr, cmdline); - } else { - cmd_line_magic = 0xA33F; - cmd_line_offset = heap_end; - setup_move_size = heap_end + strlen(cmdline) + 1; - strcpy(base_ptr + cmd_line_offset, cmdline); - } + type_of_loader = <type code>; + if (loading_initrd) { + ramdisk_image = <initrd_address>; + ramdisk_size = <initrd_size>; + } + + if (protocol >= 0x0202 && loadflags & 0x01) + heap_end = 0xe000; + else + heap_end = 0x9800; + + if (protocol >= 0x0201) { + heap_end_ptr = heap_end - 0x200; + loadflags |= 0x80; /* CAN_USE_HEAP */ + } + + if (protocol >= 0x0202) { + cmd_line_ptr = base_ptr + heap_end; + strcpy(cmd_line_ptr, cmdline); + } else { + cmd_line_magic = 0xA33F; + cmd_line_offset = heap_end; + setup_move_size = heap_end + strlen(cmdline) + 1; + strcpy(base_ptr + cmd_line_offset, cmdline); + } } else { - /* Very old kernel */ + /* Very old kernel */ - heap_end = 0x9800; + heap_end = 0x9800; - cmd_line_magic = 0xA33F; - cmd_line_offset = heap_end; + cmd_line_magic = 0xA33F; + cmd_line_offset = heap_end; - /* A very old kernel MUST have its real-mode code loaded at 0x90000 */ - if (base_ptr != 0x90000) { - /* Copy the real-mode kernel */ - memcpy(0x90000, base_ptr, (setup_sects + 1) * 512); - base_ptr = 0x90000; /* Relocated */ - } + /* A very old kernel MUST have its real-mode code loaded at 0x90000 */ + if (base_ptr != 0x90000) { + /* Copy the real-mode kernel */ + memcpy(0x90000, base_ptr, (setup_sects + 1) * 512); + base_ptr = 0x90000; /* Relocated */ + } - strcpy(0x90000 + cmd_line_offset, cmdline); + strcpy(0x90000 + cmd_line_offset, cmdline); - /* It is recommended to clear memory up to the 32K mark */ - memset(0x90000 + (setup_sects + 1) * 512, 0, (64 - (setup_sects + 1)) * 512); + /* It is recommended to clear memory up to the 32K mark */ + memset(0x90000 + (setup_sects + 1) * 512, 0, (64 - (setup_sects + 1)) * 512); } diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index d561a8443c13..9b4e04690e1a 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -15,7 +15,7 @@ extern void __WARN_trap(struct bug_entry *bug, ...); /* * Despite that some emulators terminate on UD2, we use it for WARN(). */ -#define ASM_UD2 _ASM_BYTES(0x0f, 0x0b) +#define ASM_UD2 __ASM_FORM(ud2) #define INSN_UD2 0x0b0f #define LEN_UD2 2 diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 5a0d42464d44..4e55d1755846 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -87,4 +87,11 @@ static inline void panic_if_irq_remap(const char *msg) } #endif /* CONFIG_IRQ_REMAP */ + +#ifdef CONFIG_X86_POSTED_MSI +void intel_ack_posted_msi_irq(struct irq_data *irqd); +#else +#define intel_ack_posted_msi_irq NULL +#endif + #endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index 6989b824fd32..d0b62e255290 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -122,7 +122,7 @@ struct uv_systab { struct { u32 type:8; /* type of entry */ u32 offset:24; /* byte offset from struct start to entry */ - } entry[1]; /* additional entries follow */ + } entry[]; /* additional entries follow */ }; extern struct uv_systab *uv_systab; diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index 66f1efa16fbb..9322a9287dc7 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -242,7 +242,7 @@ static int __sgx_encl_add_page(struct sgx_encl *encl, /* * If the caller requires measurement of the page as a proof for the content, * use EEXTEND to add a measurement for 256 bytes of the page. Repeat this - * operation until the entire page is measured." + * operation until the entire page is measured. */ static int __sgx_encl_extend(struct sgx_encl *encl, struct sgx_epc_page *epc_page) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 48113c5193aa..76153dfb58c9 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1946,7 +1946,7 @@ static int dump_xsave_layout_desc(struct coredump_params *cprm) }; if (!dump_emit(cprm, &xc, sizeof(xc))) - return 0; + return -1; num_records++; } @@ -1984,7 +1984,7 @@ int elf_coredump_extra_notes_write(struct coredump_params *cprm) return 1; num_records = dump_xsave_layout_desc(cprm); - if (!num_records) + if (num_records < 0) return 1; /* Total size should be equal to the number of records */ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 86f4e574de02..b2fe6181960c 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -397,6 +397,7 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi) /* Posted Interrupt Descriptors for coalesced MSIs to be posted */ DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); +static DEFINE_PER_CPU_CACHE_HOT(bool, posted_msi_handler_active); void intel_posted_msi_init(void) { @@ -414,6 +415,25 @@ void intel_posted_msi_init(void) this_cpu_write(posted_msi_pi_desc.ndst, destination); } +void intel_ack_posted_msi_irq(struct irq_data *irqd) +{ + irq_move_irq(irqd); + + /* + * Handle the rare case that irq_retrigger() raised the actual + * assigned vector on the target CPU, which means that it was not + * invoked via the posted MSI handler below. In that case APIC EOI + * is required as otherwise the ISR entry becomes stale and lower + * priority interrupts are never going to be delivered after that. + * + * If the posted handler invoked the device interrupt handler then + * the EOI would be premature because it would acknowledge the + * posted vector. + */ + if (unlikely(!__this_cpu_read(posted_msi_handler_active))) + apic_eoi(); +} + static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs) { unsigned long pir_copy[NR_PIR_WORDS]; @@ -446,6 +466,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification) pid = this_cpu_ptr(&posted_msi_pi_desc); + /* Mark the handler active for intel_ack_posted_msi_irq() */ + __this_cpu_write(posted_msi_handler_active, true); inc_irq_stat(posted_msi_notification_count); irq_enter(); @@ -474,6 +496,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification) apic_eoi(); irq_exit(); + __this_cpu_write(posted_msi_handler_active, false); set_irq_regs(old_regs); } #endif /* X86_POSTED_MSI */ diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 4f9b01dc91e8..8bcbfe3d9c72 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -1303,17 +1303,17 @@ static struct irq_chip intel_ir_chip = { * irq_enter(); * handle_edge_irq() * irq_chip_ack_parent() - * irq_move_irq(); // No EOI + * intel_ack_posted_msi_irq(); // No EOI * handle_irq_event() * driver_handler() * handle_edge_irq() * irq_chip_ack_parent() - * irq_move_irq(); // No EOI + * intel_ack_posted_msi_irq(); // No EOI * handle_irq_event() * driver_handler() * handle_edge_irq() * irq_chip_ack_parent() - * irq_move_irq(); // No EOI + * intel_ack_posted_msi_irq(); // No EOI * handle_irq_event() * driver_handler() * apic_eoi() @@ -1322,7 +1322,7 @@ static struct irq_chip intel_ir_chip = { */ static struct irq_chip intel_ir_chip_post_msi = { .name = "INTEL-IR-POST", - .irq_ack = irq_move_irq, + .irq_ack = intel_ack_posted_msi_irq, .irq_set_affinity = intel_ir_set_affinity, .irq_compose_msi_msg = intel_ir_compose_msi_msg, .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9f6de068295d..42af2292951d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1631,7 +1631,6 @@ enum tlb_flush_reason { TLB_LOCAL_MM_SHOOTDOWN, TLB_REMOTE_SEND_IPI, TLB_REMOTE_WRONG_CPU, - NR_TLB_FLUSH_REASONS, }; /** diff --git a/include/trace/events/tlb.h b/include/trace/events/tlb.h index b4d8e7dc38f8..fb8369511685 100644 --- a/include/trace/events/tlb.h +++ b/include/trace/events/tlb.h @@ -12,8 +12,9 @@ EM( TLB_FLUSH_ON_TASK_SWITCH, "flush on task switch" ) \ EM( TLB_REMOTE_SHOOTDOWN, "remote shootdown" ) \ EM( TLB_LOCAL_SHOOTDOWN, "local shootdown" ) \ - EM( TLB_LOCAL_MM_SHOOTDOWN, "local mm shootdown" ) \ - EMe( TLB_REMOTE_SEND_IPI, "remote ipi send" ) + EM( TLB_LOCAL_MM_SHOOTDOWN, "local MM shootdown" ) \ + EM( TLB_REMOTE_SEND_IPI, "remote IPI send" ) \ + EMe( TLB_REMOTE_WRONG_CPU, "remote wrong CPU" ) /* * First define the enums in TLB_FLUSH_REASON to be exported to userspace |
