diff options
Diffstat (limited to 'drivers/hv')
-rw-r--r-- | drivers/hv/channel_mgmt.c | 4 | ||||
-rw-r--r-- | drivers/hv/hv.c | 30 | ||||
-rw-r--r-- | drivers/hv/hv_balloon.c | 3 | ||||
-rw-r--r-- | drivers/hv/hv_util.c | 3 | ||||
-rw-r--r-- | drivers/hv/ring_buffer.c | 65 | ||||
-rw-r--r-- | drivers/hv/vmbus_drv.c | 110 |
6 files changed, 184 insertions, 31 deletions
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index ecc2bd275a73..f3b551a50653 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -527,10 +527,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) struct hv_device *dev = newchannel->primary_channel->device_obj; - if (vmbus_add_channel_kobj(dev, newchannel)) { - atomic_dec(&vmbus_connection.offer_in_progress); + if (vmbus_add_channel_kobj(dev, newchannel)) goto err_free_chan; - } if (channel->sc_creation_callback != NULL) channel->sc_creation_callback(newchannel); diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 658dc765753b..312fe5ed7c40 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -64,7 +64,7 @@ int hv_init(void) return -ENOMEM; direct_mode_enabled = ms_hyperv.misc_features & - HV_X64_STIMER_DIRECT_MODE_AVAILABLE; + HV_STIMER_DIRECT_MODE_AVAILABLE; return 0; } @@ -127,14 +127,14 @@ static int hv_ce_set_next_event(unsigned long delta, current_tick = hyperv_cs->read(NULL); current_tick += delta; - hv_init_timer(HV_X64_MSR_STIMER0_COUNT, current_tick); + hv_init_timer(0, current_tick); return 0; } static int hv_ce_shutdown(struct clock_event_device *evt) { - hv_init_timer(HV_X64_MSR_STIMER0_COUNT, 0); - hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, 0); + hv_init_timer(0, 0); + hv_init_timer_config(0, 0); if (direct_mode_enabled) hv_disable_stimer0_percpu_irq(stimer0_irq); @@ -164,7 +164,7 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt) timer_cfg.direct_mode = 0; timer_cfg.sintx = VMBUS_MESSAGE_SINT; } - hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); + hv_init_timer_config(0, timer_cfg.as_uint64); return 0; } @@ -298,18 +298,16 @@ int hv_synic_init(unsigned int cpu) hv_set_siefp(siefp.as_uint64); /* Setup the shared SINT. */ - hv_get_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, - shared_sint.as_uint64); + hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); shared_sint.vector = HYPERVISOR_CALLBACK_VECTOR; shared_sint.masked = false; - if (ms_hyperv.hints & HV_X64_DEPRECATING_AEOI_RECOMMENDED) + if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED) shared_sint.auto_eoi = false; else shared_sint.auto_eoi = true; - hv_set_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, - shared_sint.as_uint64); + hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); /* Enable the global synic bit */ hv_get_synic_state(sctrl.as_uint64); @@ -322,7 +320,7 @@ int hv_synic_init(unsigned int cpu) /* * Register the per-cpu clockevent source. */ - if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE) + if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) clockevents_config_and_register(hv_cpu->clk_evt, HV_TIMER_FREQUENCY, HV_MIN_DELTA_TICKS, @@ -337,7 +335,7 @@ void hv_synic_clockevents_cleanup(void) { int cpu; - if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)) + if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE)) return; if (direct_mode_enabled) @@ -396,7 +394,7 @@ int hv_synic_cleanup(unsigned int cpu) return -EBUSY; /* Turn off clockevent device */ - if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE) { + if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) { struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context); @@ -405,15 +403,13 @@ int hv_synic_cleanup(unsigned int cpu) put_cpu_ptr(hv_cpu); } - hv_get_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, - shared_sint.as_uint64); + hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); shared_sint.masked = 1; /* Need to correctly cleanup in the case of SMP!!! */ /* Disable the interrupt */ - hv_set_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, - shared_sint.as_uint64); + hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); hv_get_simp(simp.as_uint64); simp.simp_enabled = 0; diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index b3e9f13f8bc3..b1b788082793 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1765,6 +1765,9 @@ static struct hv_driver balloon_drv = { .id_table = id_table, .probe = balloon_probe, .remove = balloon_remove, + .driver = { + .probe_type = PROBE_PREFER_ASYNCHRONOUS, + }, }; static int __init init_balloon_drv(void) diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 14dce25c104f..423205077bf6 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -487,6 +487,9 @@ static struct hv_driver util_drv = { .id_table = id_table, .probe = util_probe, .remove = util_remove, + .driver = { + .probe_type = PROBE_PREFER_ASYNCHRONOUS, + }, }; static int hv_ptp_enable(struct ptp_clock_info *info, diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index be3c8b10b84a..3e90eb91db45 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -431,7 +431,24 @@ static u32 hv_pkt_iter_bytes_read(const struct hv_ring_buffer_info *rbi, } /* - * Update host ring buffer after iterating over packets. + * Update host ring buffer after iterating over packets. If the host has + * stopped queuing new entries because it found the ring buffer full, and + * sufficient space is being freed up, signal the host. But be careful to + * only signal the host when necessary, both for performance reasons and + * because Hyper-V protects itself by throttling guests that signal + * inappropriately. + * + * Determining when to signal is tricky. There are three key data inputs + * that must be handled in this order to avoid race conditions: + * + * 1. Update the read_index + * 2. Read the pending_send_sz + * 3. Read the current write_index + * + * The interrupt_mask is not used to determine when to signal. The + * interrupt_mask is used only on the guest->host ring buffer when + * sending requests to the host. The host does not use it on the host-> + * guest ring buffer to indicate whether it should be signaled. */ void hv_pkt_iter_close(struct vmbus_channel *channel) { @@ -447,22 +464,30 @@ void hv_pkt_iter_close(struct vmbus_channel *channel) start_read_index = rbi->ring_buffer->read_index; rbi->ring_buffer->read_index = rbi->priv_read_index; + /* + * Older versions of Hyper-V (before WS2102 and Win8) do not + * implement pending_send_sz and simply poll if the host->guest + * ring buffer is full. No signaling is needed or expected. + */ if (!rbi->ring_buffer->feature_bits.feat_pending_send_sz) return; /* * Issue a full memory barrier before making the signaling decision. - * Here is the reason for having this barrier: - * If the reading of the pend_sz (in this function) - * were to be reordered and read before we commit the new read - * index (in the calling function) we could - * have a problem. If the host were to set the pending_sz after we - * have sampled pending_sz and go to sleep before we commit the + * If reading pending_send_sz were to be reordered and happen + * before we commit the new read_index, a race could occur. If the + * host were to set the pending_send_sz after we have sampled + * pending_send_sz, and the ring buffer blocks before we commit the * read index, we could miss sending the interrupt. Issue a full * memory barrier to address this. */ virt_mb(); + /* + * If the pending_send_sz is zero, then the ring buffer is not + * blocked and there is no need to signal. This is far by the + * most common case, so exit quickly for best performance. + */ pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz); if (!pending_sz) return; @@ -476,14 +501,32 @@ void hv_pkt_iter_close(struct vmbus_channel *channel) bytes_read = hv_pkt_iter_bytes_read(rbi, start_read_index); /* - * If there was space before we began iteration, - * then host was not blocked. + * We want to signal the host only if we're transitioning + * from a "not enough free space" state to a "enough free + * space" state. For example, it's possible that this function + * could run and free up enough space to signal the host, and then + * run again and free up additional space before the host has a + * chance to clear the pending_send_sz. The 2nd invocation would + * be a null transition from "enough free space" to "enough free + * space", which doesn't warrant a signal. + * + * Exactly filling the ring buffer is treated as "not enough + * space". The ring buffer always must have at least one byte + * empty so the empty and full conditions are distinguishable. + * hv_get_bytes_to_write() doesn't fully tell the truth in + * this regard. + * + * So first check if we were in the "enough free space" state + * before we began the iteration. If so, the host was not + * blocked, and there's no need to signal. */ - if (curr_write_sz - bytes_read > pending_sz) return; - /* If pending write will not fit, don't give false hope. */ + /* + * Similarly, if the new state is "not enough space", then + * there's no need to signal. + */ if (curr_write_sz <= pending_sz) return; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index b10fe26c4891..05e37283d7c3 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -56,6 +56,8 @@ static struct completion probe_event; static int hyperv_cpuhp_online; +static void *hv_panic_page; + static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, void *args) { @@ -1018,6 +1020,75 @@ static void vmbus_isr(void) add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); } +/* + * Boolean to control whether to report panic messages over Hyper-V. + * + * It can be set via /proc/sys/kernel/hyperv/record_panic_msg + */ +static int sysctl_record_panic_msg = 1; + +/* + * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg + * buffer and call into Hyper-V to transfer the data. + */ +static void hv_kmsg_dump(struct kmsg_dumper *dumper, + enum kmsg_dump_reason reason) +{ + size_t bytes_written; + phys_addr_t panic_pa; + + /* We are only interested in panics. */ + if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg)) + return; + + panic_pa = virt_to_phys(hv_panic_page); + + /* + * Write dump contents to the page. No need to synchronize; panic should + * be single-threaded. + */ + if (!kmsg_dump_get_buffer(dumper, true, hv_panic_page, + PAGE_SIZE, &bytes_written)) { + pr_err("Hyper-V: Unable to get kmsg data for panic\n"); + return; + } + + hyperv_report_panic_msg(panic_pa, bytes_written); +} + +static struct kmsg_dumper hv_kmsg_dumper = { + .dump = hv_kmsg_dump, +}; + +static struct ctl_table_header *hv_ctl_table_hdr; +static int zero; +static int one = 1; + +/* + * sysctl option to allow the user to control whether kmsg data should be + * reported to Hyper-V on panic. + */ +static struct ctl_table hv_ctl_table[] = { + { + .procname = "hyperv_record_panic_msg", + .data = &sysctl_record_panic_msg, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one + }, + {} +}; + +static struct ctl_table hv_root_table[] = { + { + .procname = "kernel", + .mode = 0555, + .child = hv_ctl_table + }, + {} +}; /* * vmbus_bus_init -Main vmbus driver initialization routine. @@ -1065,6 +1136,32 @@ static int vmbus_bus_init(void) * Only register if the crash MSRs are available */ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { + u64 hyperv_crash_ctl; + /* + * Sysctl registration is not fatal, since by default + * reporting is enabled. + */ + hv_ctl_table_hdr = register_sysctl_table(hv_root_table); + if (!hv_ctl_table_hdr) + pr_err("Hyper-V: sysctl table register error"); + + /* + * Register for panic kmsg callback only if the right + * capability is supported by the hypervisor. + */ + rdmsrl(HV_X64_MSR_CRASH_CTL, hyperv_crash_ctl); + if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) { + hv_panic_page = (void *)get_zeroed_page(GFP_KERNEL); + if (hv_panic_page) { + ret = kmsg_dump_register(&hv_kmsg_dumper); + if (ret) + pr_err("Hyper-V: kmsg dump register " + "error 0x%x\n", ret); + } else + pr_err("Hyper-V: panic message page memory " + "allocation failed"); + } + register_die_notifier(&hyperv_die_block); atomic_notifier_chain_register(&panic_notifier_list, &hyperv_panic_block); @@ -1081,6 +1178,11 @@ err_alloc: hv_remove_vmbus_irq(); bus_unregister(&hv_bus); + free_page((unsigned long)hv_panic_page); + if (!hv_ctl_table_hdr) { + unregister_sysctl_table(hv_ctl_table_hdr); + hv_ctl_table_hdr = NULL; + } return ret; } @@ -1785,10 +1887,18 @@ static void __exit vmbus_exit(void) vmbus_free_channels(); if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { + kmsg_dump_unregister(&hv_kmsg_dumper); unregister_die_notifier(&hyperv_die_block); atomic_notifier_chain_unregister(&panic_notifier_list, &hyperv_panic_block); } + + free_page((unsigned long)hv_panic_page); + if (!hv_ctl_table_hdr) { + unregister_sysctl_table(hv_ctl_table_hdr); + hv_ctl_table_hdr = NULL; + } + bus_unregister(&hv_bus); cpuhp_remove_state(hyperv_cpuhp_online); |