From 7977b74dcaa6e7ed3d69ae016a05d2f36ffeeaa3 Mon Sep 17 00:00:00 2001
Message-Id: <7977b74dcaa6e7ed3d69ae016a05d2f36ffeeaa3.1343746747.git.minovotn@redhat.com>
In-Reply-To: <a05f97bf1253f4585f1a6c3f03925d7d24a064f4.1343746747.git.minovotn@redhat.com>
References: <a05f97bf1253f4585f1a6c3f03925d7d24a064f4.1343746747.git.minovotn@redhat.com>
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 27 Jul 2012 19:47:34 +0200
Subject: [PATCH 6/6] pci-assign: Update MSI-X config based on table writes

RH-Author: Alex Williamson <alex.williamson@redhat.com>
Message-id: <20120727194734.6928.69837.stgit@bling.home>
Patchwork-id: 40445
O-Subject: [RHEL6.4 qemu-kvm PATCH 5/5] pci-assign: Update MSI-X config based on table writes
Bugzilla: 784496
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
RH-Acked-by: Don Dutile <ddutile@redhat.com>
RH-Acked-by: Jason Baron <jbaron@redhat.com>

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=784496
Upstream commit a6b402c989cc83e38bdbccf570296eb8d64fe5af

When a guest enables MSI-X in PCI configuration space we walk
through the MSI-X vector table trying to guess what might get
used.  We have to guess because we don't do anything with
writes to the vector table, so we look for clues ahead of time
to pre-enable the vectors we think will be used.  This means
that instead of doing the sane thing and test the mask bit, we
test whether the data field contains a non-zero value.  It's
amazing this works as well as it does.

However, two key things missed by doing this is that we don't
catch vector changes after enabling (ex. setting smp_affinity
on an irq) and we don't support guests that don't touch the
vector table prior to enabling the MSI-X capability (ex.
freebsd).  This patch fixes both of these problems.

NB we're not actually masking vectors yet with this patch as
it's unclear whether we really have the ability to do this
without losing interrupts.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---

 hw/device-assignment.c |   95 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 84 insertions(+), 11 deletions(-)

Signed-off-by: Michal Novotny <minovotn@redhat.com>
---
 hw/device-assignment.c |   95 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 84 insertions(+), 11 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index fba1dff..b2540a8 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -1134,6 +1134,11 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
 #endif
 
 #ifdef KVM_CAP_DEVICE_MSIX
+static bool msix_masked(MSIXTableEntry *entry)
+{
+    return (entry->ctrl & cpu_to_le32(0x1)) != 0;
+}
+
 static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
 {
     AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
@@ -1145,17 +1150,19 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
 
     /* Get the usable entry number for allocating */
     for (i = 0; i < adev->msix_max; i++, entry++) {
-        /* Ignore unused entry even it's unmasked */
-        if (entry->data == 0) {
+        if (msix_masked(entry)) {
             continue;
         }
         entries_nr++;
     }
 
-    if (entries_nr == 0) {
-        fprintf(stderr, "MSI-X entry number is zero!\n");
-        return -EINVAL;
+    DEBUG("MSI-X entries: %d\n", entries_nr);
+
+    /* It's valid to enable MSI-X with all entries masked */
+    if (!entries_nr) {
+        return 0;
     }
+
     msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_busnr,
                                           (uint8_t)adev->h_devfn);
     msix_nr.entry_nr = entries_nr;
@@ -1178,7 +1185,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
     msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
     entry = adev->msix_table;
     for (i = 0; i < adev->msix_max; i++, entry++) {
-        if (entry->data == 0) {
+        if (msix_masked(entry)) {
             continue;
         }
 
@@ -1251,9 +1258,12 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
             perror("assigned_dev_update_msix_mmio");
             return;
         }
-        if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) {
-            perror("assigned_dev_enable_msix: assign irq");
-            return;
+
+        if (assigned_dev->irq_entries_nr) {
+            if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) {
+                perror("assigned_dev_enable_msix: assign irq");
+                return;
+            }
         }
         assigned_dev->irq_requested_type = assigned_irq_data.flags;
     }
@@ -1638,11 +1648,74 @@ static void msix_mmio_writel(void *opaque,
 {
     AssignedDevice *adev = opaque;
     unsigned int offset = addr & 0xfff;
+    PCIDevice *pdev = &adev->dev;
+    uint16_t ctrl;
+    MSIXTableEntry orig;
+    int i = offset >> 4;
+    uint8_t msix_cap;
+
+    if (i >= adev->msix_max) {
+        return; /* Drop write */
+    }
+
+    msix_cap = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
+    ctrl = pci_get_word(pdev->config + msix_cap + PCI_MSIX_FLAGS);
 
-    DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
-		    addr, val);
+    DEBUG("write to MSI-X table offset 0x%lx, val 0x%lx\n", addr, val);
+
+    if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
+        orig = adev->msix_table[i];
+    }
 
     memcpy((void *)((uint8_t *)adev->msix_table + offset), &val, 4);
+
+    if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
+        MSIXTableEntry *entry = &adev->msix_table[i];
+
+        if (!msix_masked(&orig) && msix_masked(entry)) {
+            /*
+             * Vector masked, disable it
+             *
+             * XXX It's not clear if we can or should actually attempt
+             * to mask or disable the interrupt.  KVM doesn't have
+             * support for pending bits and kvm_assign_set_msix_entry
+             * doesn't modify the device hardware mask.  Interrupts
+             * while masked are simply not injected to the guest, so
+             * are lost.  Can we get away with always injecting an
+             * interrupt on unmask?
+             */
+        } else if (msix_masked(&orig) && !msix_masked(entry)) {
+            /* Vector unmasked */
+            if (i >= adev->irq_entries_nr || !adev->entry[i].type) {
+                /* Previously unassigned vector, start from scratch */
+                assigned_dev_update_msix(pdev, msix_cap + PCI_MSIX_FLAGS);
+                return;
+            } else {
+                /* Update an existing, previously masked vector */
+                struct kvm_irq_routing_entry orig = adev->entry[i];
+                int ret;
+
+                adev->entry[i].u.msi.address_lo = entry->addr_lo;
+                adev->entry[i].u.msi.address_hi = entry->addr_hi;
+                adev->entry[i].u.msi.data = entry->data;
+
+                ret = kvm_update_routing_entry(kvm_context, &orig,
+                                               &adev->entry[i]);
+                if (ret) {
+                    fprintf(stderr,
+                            "Error updating irq routing entry (%d)\n", ret);
+                    return;
+                }
+
+                ret = kvm_commit_irq_routes(kvm_context);
+                if (ret) {
+                    fprintf(stderr,
+                            "Error committing irq routes (%d)\n", ret);
+                    return;
+                }
+            }
+        }
+    }
 }
 
 static void msix_mmio_writew(void *opaque,
-- 
1.7.10.4

