summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/arch/x86/boot.rst198
-rw-r--r--arch/x86/include/asm/bug.h2
-rw-r--r--arch/x86/include/asm/irq_remapping.h7
-rw-r--r--arch/x86/include/asm/uv/bios.h2
-rw-r--r--arch/x86/kernel/cpu/sgx/ioctl.c2
-rw-r--r--arch/x86/kernel/fpu/xstate.c4
-rw-r--r--arch/x86/kernel/irq.c23
-rw-r--r--drivers/iommu/intel/irq_remapping.c8
-rw-r--r--include/linux/mm_types.h1
-rw-r--r--include/trace/events/tlb.h5
10 files changed, 141 insertions, 111 deletions
diff --git a/Documentation/arch/x86/boot.rst b/Documentation/arch/x86/boot.rst
index 6d36ce86fd8e..dca3875a2435 100644
--- a/Documentation/arch/x86/boot.rst
+++ b/Documentation/arch/x86/boot.rst
@@ -95,26 +95,26 @@ Memory Layout
The traditional memory map for the kernel loader, used for Image or
zImage kernels, typically looks like::
- | |
+ | |
0A0000 +------------------------+
- | Reserved for BIOS | Do not use. Reserved for BIOS EBDA.
+ | Reserved for BIOS | Do not use. Reserved for BIOS EBDA.
09A000 +------------------------+
- | Command line |
- | Stack/heap | For use by the kernel real-mode code.
+ | Command line |
+ | Stack/heap | For use by the kernel real-mode code.
098000 +------------------------+
- | Kernel setup | The kernel real-mode code.
+ | Kernel setup | The kernel real-mode code.
090200 +------------------------+
- | Kernel boot sector | The kernel legacy boot sector.
+ | Kernel boot sector | The kernel legacy boot sector.
090000 +------------------------+
- | Protected-mode kernel | The bulk of the kernel image.
+ | Protected-mode kernel | The bulk of the kernel image.
010000 +------------------------+
- | Boot loader | <- Boot sector entry point 0000:7C00
+ | Boot loader | <- Boot sector entry point 0000:7C00
001000 +------------------------+
- | Reserved for MBR/BIOS |
+ | Reserved for MBR/BIOS |
000800 +------------------------+
- | Typically used by MBR |
+ | Typically used by MBR |
000600 +------------------------+
- | BIOS use only |
+ | BIOS use only |
000000 +------------------------+
When using bzImage, the protected-mode kernel was relocated to
@@ -142,27 +142,27 @@ above the 0x9A000 point; too many BIOSes will break above that point.
For a modern bzImage kernel with boot protocol version >= 2.02, a
memory layout like the following is suggested::
- ~ ~
- | Protected-mode kernel |
+ ~ ~
+ | Protected-mode kernel |
100000 +------------------------+
- | I/O memory hole |
+ | I/O memory hole |
0A0000 +------------------------+
- | Reserved for BIOS | Leave as much as possible unused
- ~ ~
- | Command line | (Can also be below the X+10000 mark)
+ | Reserved for BIOS | Leave as much as possible unused
+ ~ ~
+ | Command line | (Can also be below the X+10000 mark)
X+10000 +------------------------+
- | Stack/heap | For use by the kernel real-mode code.
+ | Stack/heap | For use by the kernel real-mode code.
X+08000 +------------------------+
- | Kernel setup | The kernel real-mode code.
- | Kernel boot sector | The kernel legacy boot sector.
+ | Kernel setup | The kernel real-mode code.
+ | Kernel boot sector | The kernel legacy boot sector.
X +------------------------+
- | Boot loader | <- Boot sector entry point 0000:7C00
+ | Boot loader | <- Boot sector entry point 0000:7C00
001000 +------------------------+
- | Reserved for MBR/BIOS |
+ | Reserved for MBR/BIOS |
000800 +------------------------+
- | Typically used by MBR |
+ | Typically used by MBR |
000600 +------------------------+
- | BIOS use only |
+ | BIOS use only |
000000 +------------------------+
... where the address X is as low as the design of the boot loader permits.
@@ -433,7 +433,7 @@ Protocol: 2.00+
Assigned boot loader IDs:
- == =======================================
+ ==== =======================================
0x0 LILO
(0x00 reserved for pre-2.00 bootloader)
0x1 Loadlin
@@ -456,7 +456,7 @@ Protocol: 2.00+
<http://sebastian-plotz.blogspot.de>
0x12 OVMF UEFI virtualization stack
0x13 barebox
- == =======================================
+ ==== =======================================
Please contact <hpa@zytor.com> if you need a bootloader ID value assigned.
@@ -809,12 +809,12 @@ Protocol: 2.09+
as follow::
struct setup_data {
- __u64 next;
- __u32 type;
- __u32 len;
- __u8 data[];
+ __u64 next;
+ __u32 type;
+ __u32 len;
+ __u8 data[];
}
-
+
Where, the next is a 64-bit physical pointer to the next node of
linked list, the next field of the last node is 0; the type is used
to identify the contents of data; the len is the length of data
@@ -835,10 +835,10 @@ Protocol: 2.09+
protocol 2.15::
struct setup_indirect {
- __u32 type;
- __u32 reserved; /* Reserved, must be set to zero. */
- __u64 len;
- __u64 addr;
+ __u32 type;
+ __u32 reserved; /* Reserved, must be set to zero. */
+ __u64 len;
+ __u64 addr;
};
The type member is a SETUP_INDIRECT | SETUP_* type. However, it cannot be
@@ -850,15 +850,15 @@ Protocol: 2.09+
In this case setup_data and setup_indirect will look like this::
struct setup_data {
- .next = 0, /* or <addr_of_next_setup_data_struct> */
- .type = SETUP_INDIRECT,
- .len = sizeof(setup_indirect),
- .data[sizeof(setup_indirect)] = (struct setup_indirect) {
- .type = SETUP_INDIRECT | SETUP_E820_EXT,
- .reserved = 0,
- .len = <len_of_SETUP_E820_EXT_data>,
- .addr = <addr_of_SETUP_E820_EXT_data>,
- },
+ .next = 0, /* or <addr_of_next_setup_data_struct> */
+ .type = SETUP_INDIRECT,
+ .len = sizeof(setup_indirect),
+ .data[sizeof(setup_indirect)] = (struct setup_indirect) {
+ .type = SETUP_INDIRECT | SETUP_E820_EXT,
+ .reserved = 0,
+ .len = <len_of_SETUP_E820_EXT_data>,
+ .addr = <addr_of_SETUP_E820_EXT_data>,
+ },
}
.. note::
@@ -897,11 +897,11 @@ Offset/size: 0x260/4
The kernel runtime start address is determined by the following algorithm::
if (relocatable_kernel) {
- if (load_address < pref_address)
- load_address = pref_address;
- runtime_start = align_up(load_address, kernel_alignment);
+ if (load_address < pref_address)
+ load_address = pref_address;
+ runtime_start = align_up(load_address, kernel_alignment);
} else {
- runtime_start = pref_address;
+ runtime_start = pref_address;
}
Hence the necessary memory window location and size can be estimated by
@@ -975,22 +975,22 @@ after kernel_info_var_len_data label. Each chunk of variable size data has to
be prefixed with header/magic and its size, e.g.::
kernel_info:
- .ascii "LToP" /* Header, Linux top (structure). */
- .long kernel_info_var_len_data - kernel_info
- .long kernel_info_end - kernel_info
- .long 0x01234567 /* Some fixed size data for the bootloaders. */
+ .ascii "LToP" /* Header, Linux top (structure). */
+ .long kernel_info_var_len_data - kernel_info
+ .long kernel_info_end - kernel_info
+ .long 0x01234567 /* Some fixed size data for the bootloaders. */
kernel_info_var_len_data:
example_struct: /* Some variable size data for the bootloaders. */
- .ascii "0123" /* Header/Magic. */
- .long example_struct_end - example_struct
- .ascii "Struct"
- .long 0x89012345
+ .ascii "0123" /* Header/Magic. */
+ .long example_struct_end - example_struct
+ .ascii "Struct"
+ .long 0x89012345
example_struct_end:
example_strings: /* Some variable size data for the bootloaders. */
- .ascii "ABCD" /* Header/Magic. */
- .long example_strings_end - example_strings
- .asciz "String_0"
- .asciz "String_1"
+ .ascii "ABCD" /* Header/Magic. */
+ .long example_strings_end - example_strings
+ .asciz "String_0"
+ .asciz "String_1"
example_strings_end:
kernel_info_end:
@@ -1132,53 +1132,53 @@ Such a boot loader should enter the following fields in the header::
unsigned long base_ptr; /* base address for real-mode segment */
if (setup_sects == 0)
- setup_sects = 4;
+ setup_sects = 4;
if (protocol >= 0x0200) {
- type_of_loader = <type code>;
- if (loading_initrd) {
- ramdisk_image = <initrd_address>;
- ramdisk_size = <initrd_size>;
- }
-
- if (protocol >= 0x0202 && loadflags & 0x01)
- heap_end = 0xe000;
- else
- heap_end = 0x9800;
-
- if (protocol >= 0x0201) {
- heap_end_ptr = heap_end - 0x200;
- loadflags |= 0x80; /* CAN_USE_HEAP */
- }
-
- if (protocol >= 0x0202) {
- cmd_line_ptr = base_ptr + heap_end;
- strcpy(cmd_line_ptr, cmdline);
- } else {
- cmd_line_magic = 0xA33F;
- cmd_line_offset = heap_end;
- setup_move_size = heap_end + strlen(cmdline) + 1;
- strcpy(base_ptr + cmd_line_offset, cmdline);
- }
+ type_of_loader = <type code>;
+ if (loading_initrd) {
+ ramdisk_image = <initrd_address>;
+ ramdisk_size = <initrd_size>;
+ }
+
+ if (protocol >= 0x0202 && loadflags & 0x01)
+ heap_end = 0xe000;
+ else
+ heap_end = 0x9800;
+
+ if (protocol >= 0x0201) {
+ heap_end_ptr = heap_end - 0x200;
+ loadflags |= 0x80; /* CAN_USE_HEAP */
+ }
+
+ if (protocol >= 0x0202) {
+ cmd_line_ptr = base_ptr + heap_end;
+ strcpy(cmd_line_ptr, cmdline);
+ } else {
+ cmd_line_magic = 0xA33F;
+ cmd_line_offset = heap_end;
+ setup_move_size = heap_end + strlen(cmdline) + 1;
+ strcpy(base_ptr + cmd_line_offset, cmdline);
+ }
} else {
- /* Very old kernel */
+ /* Very old kernel */
- heap_end = 0x9800;
+ heap_end = 0x9800;
- cmd_line_magic = 0xA33F;
- cmd_line_offset = heap_end;
+ cmd_line_magic = 0xA33F;
+ cmd_line_offset = heap_end;
- /* A very old kernel MUST have its real-mode code loaded at 0x90000 */
- if (base_ptr != 0x90000) {
- /* Copy the real-mode kernel */
- memcpy(0x90000, base_ptr, (setup_sects + 1) * 512);
- base_ptr = 0x90000; /* Relocated */
- }
+ /* A very old kernel MUST have its real-mode code loaded at 0x90000 */
+ if (base_ptr != 0x90000) {
+ /* Copy the real-mode kernel */
+ memcpy(0x90000, base_ptr, (setup_sects + 1) * 512);
+ base_ptr = 0x90000; /* Relocated */
+ }
- strcpy(0x90000 + cmd_line_offset, cmdline);
+ strcpy(0x90000 + cmd_line_offset, cmdline);
- /* It is recommended to clear memory up to the 32K mark */
- memset(0x90000 + (setup_sects + 1) * 512, 0, (64 - (setup_sects + 1)) * 512);
+ /* It is recommended to clear memory up to the 32K mark */
+ memset(0x90000 + (setup_sects + 1) * 512, 0, (64 - (setup_sects + 1)) * 512);
}
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index d561a8443c13..9b4e04690e1a 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -15,7 +15,7 @@ extern void __WARN_trap(struct bug_entry *bug, ...);
/*
* Despite that some emulators terminate on UD2, we use it for WARN().
*/
-#define ASM_UD2 _ASM_BYTES(0x0f, 0x0b)
+#define ASM_UD2 __ASM_FORM(ud2)
#define INSN_UD2 0x0b0f
#define LEN_UD2 2
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5a0d42464d44..4e55d1755846 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -87,4 +87,11 @@ static inline void panic_if_irq_remap(const char *msg)
}
#endif /* CONFIG_IRQ_REMAP */
+
+#ifdef CONFIG_X86_POSTED_MSI
+void intel_ack_posted_msi_irq(struct irq_data *irqd);
+#else
+#define intel_ack_posted_msi_irq NULL
+#endif
+
#endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 6989b824fd32..d0b62e255290 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -122,7 +122,7 @@ struct uv_systab {
struct {
u32 type:8; /* type of entry */
u32 offset:24; /* byte offset from struct start to entry */
- } entry[1]; /* additional entries follow */
+ } entry[]; /* additional entries follow */
};
extern struct uv_systab *uv_systab;
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 66f1efa16fbb..9322a9287dc7 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -242,7 +242,7 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
/*
* If the caller requires measurement of the page as a proof for the content,
* use EEXTEND to add a measurement for 256 bytes of the page. Repeat this
- * operation until the entire page is measured."
+ * operation until the entire page is measured.
*/
static int __sgx_encl_extend(struct sgx_encl *encl,
struct sgx_epc_page *epc_page)
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 48113c5193aa..76153dfb58c9 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1946,7 +1946,7 @@ static int dump_xsave_layout_desc(struct coredump_params *cprm)
};
if (!dump_emit(cprm, &xc, sizeof(xc)))
- return 0;
+ return -1;
num_records++;
}
@@ -1984,7 +1984,7 @@ int elf_coredump_extra_notes_write(struct coredump_params *cprm)
return 1;
num_records = dump_xsave_layout_desc(cprm);
- if (!num_records)
+ if (num_records < 0)
return 1;
/* Total size should be equal to the number of records */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 86f4e574de02..b2fe6181960c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -397,6 +397,7 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi)
/* Posted Interrupt Descriptors for coalesced MSIs to be posted */
DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
+static DEFINE_PER_CPU_CACHE_HOT(bool, posted_msi_handler_active);
void intel_posted_msi_init(void)
{
@@ -414,6 +415,25 @@ void intel_posted_msi_init(void)
this_cpu_write(posted_msi_pi_desc.ndst, destination);
}
+void intel_ack_posted_msi_irq(struct irq_data *irqd)
+{
+ irq_move_irq(irqd);
+
+ /*
+ * Handle the rare case that irq_retrigger() raised the actual
+ * assigned vector on the target CPU, which means that it was not
+ * invoked via the posted MSI handler below. In that case APIC EOI
+ * is required as otherwise the ISR entry becomes stale and lower
+ * priority interrupts are never going to be delivered after that.
+ *
+ * If the posted handler invoked the device interrupt handler then
+ * the EOI would be premature because it would acknowledge the
+ * posted vector.
+ */
+ if (unlikely(!__this_cpu_read(posted_msi_handler_active)))
+ apic_eoi();
+}
+
static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs)
{
unsigned long pir_copy[NR_PIR_WORDS];
@@ -446,6 +466,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
pid = this_cpu_ptr(&posted_msi_pi_desc);
+ /* Mark the handler active for intel_ack_posted_msi_irq() */
+ __this_cpu_write(posted_msi_handler_active, true);
inc_irq_stat(posted_msi_notification_count);
irq_enter();
@@ -474,6 +496,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
apic_eoi();
irq_exit();
+ __this_cpu_write(posted_msi_handler_active, false);
set_irq_regs(old_regs);
}
#endif /* X86_POSTED_MSI */
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 4f9b01dc91e8..8bcbfe3d9c72 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -1303,17 +1303,17 @@ static struct irq_chip intel_ir_chip = {
* irq_enter();
* handle_edge_irq()
* irq_chip_ack_parent()
- * irq_move_irq(); // No EOI
+ * intel_ack_posted_msi_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
- * irq_move_irq(); // No EOI
+ * intel_ack_posted_msi_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
- * irq_move_irq(); // No EOI
+ * intel_ack_posted_msi_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* apic_eoi()
@@ -1322,7 +1322,7 @@ static struct irq_chip intel_ir_chip = {
*/
static struct irq_chip intel_ir_chip_post_msi = {
.name = "INTEL-IR-POST",
- .irq_ack = irq_move_irq,
+ .irq_ack = intel_ack_posted_msi_irq,
.irq_set_affinity = intel_ir_set_affinity,
.irq_compose_msi_msg = intel_ir_compose_msi_msg,
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9f6de068295d..42af2292951d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1631,7 +1631,6 @@ enum tlb_flush_reason {
TLB_LOCAL_MM_SHOOTDOWN,
TLB_REMOTE_SEND_IPI,
TLB_REMOTE_WRONG_CPU,
- NR_TLB_FLUSH_REASONS,
};
/**
diff --git a/include/trace/events/tlb.h b/include/trace/events/tlb.h
index b4d8e7dc38f8..fb8369511685 100644
--- a/include/trace/events/tlb.h
+++ b/include/trace/events/tlb.h
@@ -12,8 +12,9 @@
EM( TLB_FLUSH_ON_TASK_SWITCH, "flush on task switch" ) \
EM( TLB_REMOTE_SHOOTDOWN, "remote shootdown" ) \
EM( TLB_LOCAL_SHOOTDOWN, "local shootdown" ) \
- EM( TLB_LOCAL_MM_SHOOTDOWN, "local mm shootdown" ) \
- EMe( TLB_REMOTE_SEND_IPI, "remote ipi send" )
+ EM( TLB_LOCAL_MM_SHOOTDOWN, "local MM shootdown" ) \
+ EM( TLB_REMOTE_SEND_IPI, "remote IPI send" ) \
+ EMe( TLB_REMOTE_WRONG_CPU, "remote wrong CPU" )
/*
* First define the enums in TLB_FLUSH_REASON to be exported to userspace