summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/lapic.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/lapic.c')
-rw-r--r--arch/x86/kvm/lapic.c199
1 files changed, 160 insertions, 39 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 391dda8d43b7..89db20f8cb70 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -70,6 +70,11 @@
#define APIC_BROADCAST 0xFF
#define X2APIC_BROADCAST 0xFFFFFFFFul
+static bool lapic_timer_advance_adjust_done = false;
+#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
+/* step-by-step approximation to mitigate fluctuation */
+#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
+
static inline int apic_test_vector(int vec, void *bitmap)
{
return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -321,8 +326,16 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
if (!lapic_in_kernel(vcpu))
return;
+ /*
+ * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
+ * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
+ * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
+ * version first and level-triggered interrupts never get EOIed in
+ * IOAPIC.
+ */
feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
- if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
+ if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
+ !ioapic_in_kernel(vcpu->kvm))
v |= APIC_LVR_DIRECTED_EOI;
kvm_lapic_set_reg(apic, APIC_LVR, v);
}
@@ -539,6 +552,59 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
irq->level, irq->trig_mode, dest_map);
}
+int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
+ unsigned long ipi_bitmap_high, u32 min,
+ unsigned long icr, int op_64_bit)
+{
+ int i;
+ struct kvm_apic_map *map;
+ struct kvm_vcpu *vcpu;
+ struct kvm_lapic_irq irq = {0};
+ int cluster_size = op_64_bit ? 64 : 32;
+ int count = 0;
+
+ irq.vector = icr & APIC_VECTOR_MASK;
+ irq.delivery_mode = icr & APIC_MODE_MASK;
+ irq.level = (icr & APIC_INT_ASSERT) != 0;
+ irq.trig_mode = icr & APIC_INT_LEVELTRIG;
+
+ if (icr & APIC_DEST_MASK)
+ return -KVM_EINVAL;
+ if (icr & APIC_SHORT_MASK)
+ return -KVM_EINVAL;
+
+ rcu_read_lock();
+ map = rcu_dereference(kvm->arch.apic_map);
+
+ if (min > map->max_apic_id)
+ goto out;
+ /* Bits above cluster_size are masked in the caller. */
+ for_each_set_bit(i, &ipi_bitmap_low,
+ min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+ if (map->phys_map[min + i]) {
+ vcpu = map->phys_map[min + i]->vcpu;
+ count += kvm_apic_set_irq(vcpu, &irq, NULL);
+ }
+ }
+
+ min += cluster_size;
+
+ if (min > map->max_apic_id)
+ goto out;
+
+ for_each_set_bit(i, &ipi_bitmap_high,
+ min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+ if (map->phys_map[min + i]) {
+ vcpu = map->phys_map[min + i]->vcpu;
+ count += kvm_apic_set_irq(vcpu, &irq, NULL);
+ }
+ }
+
+out:
+ rcu_read_unlock();
+ return count;
+}
+
static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
{
@@ -894,14 +960,14 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
map = rcu_dereference(kvm->arch.apic_map);
ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
- if (ret)
+ if (ret) {
+ *r = 0;
for_each_set_bit(i, &bitmap, 16) {
if (!dst[i])
continue;
- if (*r < 0)
- *r = 0;
*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
}
+ }
rcu_read_unlock();
return ret;
@@ -1283,9 +1349,8 @@ EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
{
- return kvm_apic_hw_enabled(apic) &&
- addr >= apic->base_address &&
- addr < apic->base_address + LAPIC_MMIO_LENGTH;
+ return addr >= apic->base_address &&
+ addr < apic->base_address + LAPIC_MMIO_LENGTH;
}
static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
@@ -1297,6 +1362,15 @@ static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!apic_mmio_in_range(apic, address))
return -EOPNOTSUPP;
+ if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
+ if (!kvm_check_has_quirk(vcpu->kvm,
+ KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
+ return -EOPNOTSUPP;
+
+ memset(data, 0xff, len);
+ return 0;
+ }
+
kvm_lapic_reg_read(apic, offset, len, data);
return 0;
@@ -1371,7 +1445,7 @@ static void apic_timer_expired(struct kvm_lapic *apic)
* using swait_active() is safe.
*/
if (swait_active(q))
- swake_up(q);
+ swake_up_one(q);
if (apic_lvtt_tscdeadline(apic))
ktimer->expired_tscdeadline = ktimer->tscdeadline;
@@ -1403,7 +1477,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
void wait_lapic_expire(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- u64 guest_tsc, tsc_deadline;
+ u64 guest_tsc, tsc_deadline, ns;
if (!lapic_in_kernel(vcpu))
return;
@@ -1423,6 +1497,24 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
if (guest_tsc < tsc_deadline)
__delay(min(tsc_deadline - guest_tsc,
nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
+
+ if (!lapic_timer_advance_adjust_done) {
+ /* too early */
+ if (guest_tsc < tsc_deadline) {
+ ns = (tsc_deadline - guest_tsc) * 1000000ULL;
+ do_div(ns, vcpu->arch.virtual_tsc_khz);
+ lapic_timer_advance_ns -= min((unsigned int)ns,
+ lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+ } else {
+ /* too late */
+ ns = (guest_tsc - tsc_deadline) * 1000000ULL;
+ do_div(ns, vcpu->arch.virtual_tsc_khz);
+ lapic_timer_advance_ns += min((unsigned int)ns,
+ lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+ }
+ if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
+ lapic_timer_advance_adjust_done = true;
+ }
}
static void start_sw_tscdeadline(struct kvm_lapic *apic)
@@ -1455,23 +1547,6 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
local_irq_restore(flags);
}
-static void start_sw_period(struct kvm_lapic *apic)
-{
- if (!apic->lapic_timer.period)
- return;
-
- if (apic_lvtt_oneshot(apic) &&
- ktime_after(ktime_get(),
- apic->lapic_timer.target_expiration)) {
- apic_timer_expired(apic);
- return;
- }
-
- hrtimer_start(&apic->lapic_timer.timer,
- apic->lapic_timer.target_expiration,
- HRTIMER_MODE_ABS_PINNED);
-}
-
static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
{
ktime_t now, remaining;
@@ -1531,11 +1606,43 @@ static bool set_target_expiration(struct kvm_lapic *apic)
static void advance_periodic_target_expiration(struct kvm_lapic *apic)
{
- apic->lapic_timer.tscdeadline +=
- nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
+ ktime_t now = ktime_get();
+ u64 tscl = rdtsc();
+ ktime_t delta;
+
+ /*
+ * Synchronize both deadlines to the same time source or
+ * differences in the periods (caused by differences in the
+ * underlying clocks or numerical approximation errors) will
+ * cause the two to drift apart over time as the errors
+ * accumulate.
+ */
apic->lapic_timer.target_expiration =
ktime_add_ns(apic->lapic_timer.target_expiration,
apic->lapic_timer.period);
+ delta = ktime_sub(apic->lapic_timer.target_expiration, now);
+ apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
+ nsec_to_cycles(apic->vcpu, delta);
+}
+
+static void start_sw_period(struct kvm_lapic *apic)
+{
+ if (!apic->lapic_timer.period)
+ return;
+
+ if (ktime_after(ktime_get(),
+ apic->lapic_timer.target_expiration)) {
+ apic_timer_expired(apic);
+
+ if (apic_lvtt_oneshot(apic))
+ return;
+
+ advance_periodic_target_expiration(apic);
+ }
+
+ hrtimer_start(&apic->lapic_timer.timer,
+ apic->lapic_timer.target_expiration,
+ HRTIMER_MODE_ABS_PINNED);
}
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
@@ -1841,6 +1948,14 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!apic_mmio_in_range(apic, address))
return -EOPNOTSUPP;
+ if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
+ if (!kvm_check_has_quirk(vcpu->kvm,
+ KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
+ return -EOPNOTSUPP;
+
+ return 0;
+ }
+
/*
* APIC register must be aligned on 128-bits boundary.
* 32/64/128 bits registers must be accessed thru 32 bits.
@@ -1979,13 +2094,11 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
}
}
- if ((old_value ^ value) & X2APIC_ENABLE) {
- if (value & X2APIC_ENABLE) {
- kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
- kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
- } else
- kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
- }
+ if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
+ kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
+
+ if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
+ kvm_x86_ops->set_virtual_apic_mode(vcpu);
apic->base_address = apic->vcpu->arch.apic_base &
MSR_IA32_APICBASE_BASE;
@@ -2296,7 +2409,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
r = kvm_apic_state_fixup(vcpu, s, true);
if (r)
return r;
- memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
+ memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
recalculate_apic_map(vcpu->kvm);
kvm_apic_set_version(vcpu);
@@ -2531,17 +2644,25 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
return 0;
}
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
+int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
{
u64 addr = data & ~KVM_MSR_ENABLED;
+ struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
+ unsigned long new_len;
+
if (!IS_ALIGNED(addr, 4))
return 1;
vcpu->arch.pv_eoi.msr_val = data;
if (!pv_eoi_enabled(vcpu))
return 0;
- return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
- addr, sizeof(u8));
+
+ if (addr == ghc->gpa && len <= ghc->len)
+ new_len = ghc->len;
+ else
+ new_len = len;
+
+ return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
}
void kvm_apic_accept_events(struct kvm_vcpu *vcpu)