summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/vfio/pci/nvgrace-gpu/main.c26
1 files changed, 25 insertions, 1 deletions
diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c
index 5f122929dd44..df360e12b299 100644
--- a/drivers/vfio/pci/nvgrace-gpu/main.c
+++ b/drivers/vfio/pci/nvgrace-gpu/main.c
@@ -59,6 +59,8 @@ struct nvgrace_gpu_pci_core_device {
/* Lock to control device memory kernel mapping */
struct mutex remap_lock;
bool has_mig_hw_bug;
+ /* GPU has just been reset */
+ bool reset_done;
};
static void nvgrace_gpu_init_fake_bar_emu_regs(struct vfio_device *core_vdev)
@@ -1067,12 +1069,34 @@ static const struct pci_device_id nvgrace_gpu_vfio_pci_table[] = {
MODULE_DEVICE_TABLE(pci, nvgrace_gpu_vfio_pci_table);
+/*
+ * The GPU reset is required to be serialized against the *first* mapping
+ * faults and read/writes accesses to prevent potential RAS events logging.
+ *
+ * First fault or access after a reset needs to poll device readiness,
+ * flag that a reset has occurred.
+ */
+static void nvgrace_gpu_vfio_pci_reset_done(struct pci_dev *pdev)
+{
+ struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev);
+ struct nvgrace_gpu_pci_core_device *nvdev =
+ container_of(core_device, struct nvgrace_gpu_pci_core_device,
+ core_device);
+
+ nvdev->reset_done = true;
+}
+
+static const struct pci_error_handlers nvgrace_gpu_vfio_pci_err_handlers = {
+ .reset_done = nvgrace_gpu_vfio_pci_reset_done,
+ .error_detected = vfio_pci_core_aer_err_detected,
+};
+
static struct pci_driver nvgrace_gpu_vfio_pci_driver = {
.name = KBUILD_MODNAME,
.id_table = nvgrace_gpu_vfio_pci_table,
.probe = nvgrace_gpu_probe,
.remove = nvgrace_gpu_remove,
- .err_handler = &vfio_pci_core_err_handlers,
+ .err_handler = &nvgrace_gpu_vfio_pci_err_handlers,
.driver_managed_dma = true,
};