summaryrefslogtreecommitdiffstats
path: root/drivers/net/hyperv
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/hyperv')
-rw-r--r--drivers/net/hyperv/hyperv_net.h74
-rw-r--r--drivers/net/hyperv/netvsc.c459
-rw-r--r--drivers/net/hyperv/netvsc_drv.c823
-rw-r--r--drivers/net/hyperv/rndis_filter.c162
4 files changed, 946 insertions, 572 deletions
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 12cc64bfcff8..ec546da86683 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -147,8 +147,9 @@ struct hv_netvsc_packet {
struct netvsc_device_info {
unsigned char mac_adr[ETH_ALEN];
int ring_size;
- u32 max_num_vrss_chns;
u32 num_chn;
+ u32 send_sections;
+ u32 recv_sections;
};
enum rndis_device_state {
@@ -183,13 +184,16 @@ struct rndis_device {
/* Interface */
struct rndis_message;
struct netvsc_device;
-int netvsc_device_add(struct hv_device *device,
- const struct netvsc_device_info *info);
+struct net_device_context;
+
+struct netvsc_device *netvsc_device_add(struct hv_device *device,
+ const struct netvsc_device_info *info);
+int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx);
void netvsc_device_remove(struct hv_device *device);
-int netvsc_send(struct hv_device *device,
+int netvsc_send(struct net_device_context *ndc,
struct hv_netvsc_packet *packet,
struct rndis_message *rndis_msg,
- struct hv_page_buffer **page_buffer,
+ struct hv_page_buffer *page_buffer,
struct sk_buff *skb);
void netvsc_linkstatus_callback(struct hv_device *device_obj,
struct rndis_message *resp);
@@ -200,22 +204,24 @@ int netvsc_recv_callback(struct net_device *net,
const struct ndis_pkt_8021q_info *vlan);
void netvsc_channel_cb(void *context);
int netvsc_poll(struct napi_struct *napi, int budget);
+bool rndis_filter_opened(const struct netvsc_device *nvdev);
int rndis_filter_open(struct netvsc_device *nvdev);
int rndis_filter_close(struct netvsc_device *nvdev);
-int rndis_filter_device_add(struct hv_device *dev,
- struct netvsc_device_info *info);
+struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+ struct netvsc_device_info *info);
void rndis_filter_update(struct netvsc_device *nvdev);
void rndis_filter_device_remove(struct hv_device *dev,
struct netvsc_device *nvdev);
int rndis_filter_set_rss_param(struct rndis_device *rdev,
- const u8 *key, int num_queue);
+ const u8 *key);
int rndis_filter_receive(struct net_device *ndev,
struct netvsc_device *net_dev,
struct hv_device *dev,
struct vmbus_channel *channel,
void *data, u32 buflen);
-int rndis_filter_set_device_mac(struct net_device *ndev, char *mac);
+int rndis_filter_set_device_mac(struct netvsc_device *ndev,
+ const char *mac);
void netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
@@ -630,12 +636,12 @@ struct nvsp_message {
#define NETVSC_SEND_BUFFER_SIZE (1024 * 1024 * 15) /* 15MB */
#define NETVSC_INVALID_INDEX -1
+#define NETVSC_SEND_SECTION_SIZE 6144
+#define NETVSC_RECV_SECTION_SIZE 1728
#define NETVSC_RECEIVE_BUFFER_ID 0xcafe
#define NETVSC_SEND_BUFFER_ID 0
-#define NETVSC_PACKET_SIZE 4096
-
#define VRSS_SEND_TAB_SIZE 16 /* must be power of 2 */
#define VRSS_CHANNEL_MAX 64
#define VRSS_CHANNEL_DEFAULT 8
@@ -654,13 +660,10 @@ struct recv_comp_data {
u32 status;
};
-/* Netvsc Receive Slots Max */
-#define NETVSC_RECVSLOT_MAX (NETVSC_RECEIVE_BUFFER_SIZE / ETH_DATA_LEN + 1)
-
struct multi_recv_comp {
- void *buf; /* queued receive completions */
- u32 first; /* first data entry */
- u32 next; /* next entry for writing */
+ struct recv_comp_data *slots;
+ u32 first; /* first data entry */
+ u32 next; /* next entry for writing */
};
struct netvsc_stats {
@@ -677,6 +680,17 @@ struct netvsc_ethtool_stats {
unsigned long tx_no_space;
unsigned long tx_too_big;
unsigned long tx_busy;
+ unsigned long tx_send_full;
+ unsigned long rx_comp_busy;
+};
+
+struct netvsc_vf_pcpu_stats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_packets;
+ u64 tx_bytes;
+ struct u64_stats_sync syncp;
+ u32 tx_dropped;
};
struct netvsc_reconfig {
@@ -706,24 +720,27 @@ struct net_device_context {
u32 tx_send_table[VRSS_SEND_TAB_SIZE];
/* Ethtool settings */
+ bool udp4_l4_hash;
+ bool udp6_l4_hash;
u8 duplex;
u32 speed;
struct netvsc_ethtool_stats eth_stats;
/* State to manage the associated VF interface. */
struct net_device __rcu *vf_netdev;
+ struct netvsc_vf_pcpu_stats __percpu *vf_stats;
+ struct delayed_work vf_takeover;
/* 1: allocated, serial number is valid. 0: not allocated */
u32 vf_alloc;
/* Serial number of the VF to team with */
u32 vf_serial;
-
- bool datapath; /* 0 - synthetic, 1 - VF nic */
};
/* Per channel data */
struct netvsc_channel {
struct vmbus_channel *channel;
+ struct netvsc_device *net_device;
const struct vmpacket_descriptor *desc;
struct napi_struct napi;
struct multi_send_data msd;
@@ -743,14 +760,13 @@ struct netvsc_device {
/* Receive buffer allocated by us but manages by NetVSP */
void *recv_buf;
- u32 recv_buf_size;
u32 recv_buf_gpadl_handle;
u32 recv_section_cnt;
- struct nvsp_1_receive_buffer_section *recv_section;
+ u32 recv_section_size;
+ u32 recv_completion_cnt;
/* Send buffer allocated by us */
void *send_buf;
- u32 send_buf_size;
u32 send_buf_gpadl_handle;
u32 send_section_cnt;
u32 send_section_size;
@@ -775,8 +791,6 @@ struct netvsc_device {
u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
u32 pkt_align; /* alignment bytes, e.g. 8 */
- atomic_t num_outstanding_recvs;
-
atomic_t open_cnt;
struct netvsc_channel chan_table[VRSS_CHANNEL_MAX];
@@ -784,18 +798,6 @@ struct netvsc_device {
struct rcu_head rcu;
};
-static inline struct netvsc_device *
-net_device_to_netvsc_device(struct net_device *ndev)
-{
- return ((struct net_device_context *)netdev_priv(ndev))->nvdev;
-}
-
-static inline struct netvsc_device *
-hv_device_to_netvsc_device(struct hv_device *device)
-{
- return net_device_to_netvsc_device(hv_get_drvdata(device));
-}
-
/* NdisInitialize message */
struct rndis_initialize_request {
u32 req_id;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index d18c3326a1f7..0062b802676f 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -29,6 +29,9 @@
#include <linux/netdevice.h>
#include <linux/if_ether.h>
#include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
+#include <linux/prefetch.h>
+
#include <asm/sync_bitops.h>
#include "hyperv_net.h"
@@ -41,7 +44,7 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
{
struct net_device_context *net_device_ctx = netdev_priv(ndev);
struct hv_device *dev = net_device_ctx->device_ctx;
- struct netvsc_device *nv_dev = net_device_ctx->nvdev;
+ struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
memset(init_pkt, 0, sizeof(struct nvsp_message));
@@ -57,8 +60,6 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
sizeof(struct nvsp_message),
(unsigned long)init_pkt,
VM_PKT_DATA_INBAND, 0);
-
- net_device_ctx->datapath = vf;
}
static struct netvsc_device *alloc_net_device(void)
@@ -69,14 +70,15 @@ static struct netvsc_device *alloc_net_device(void)
if (!net_device)
return NULL;
- net_device->chan_table[0].mrc.buf
- = vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
-
init_waitqueue_head(&net_device->wait_drain);
net_device->destroy = false;
atomic_set(&net_device->open_cnt, 0);
net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
+
+ net_device->recv_section_size = NETVSC_RECV_SECTION_SIZE;
+ net_device->send_section_size = NETVSC_SEND_SECTION_SIZE;
+
init_completion(&net_device->channel_init_wait);
init_waitqueue_head(&net_device->subchan_open);
@@ -90,7 +92,7 @@ static void free_netvsc_device(struct rcu_head *head)
int i;
for (i = 0; i < VRSS_CHANNEL_MAX; i++)
- vfree(nvdev->chan_table[i].mrc.buf);
+ vfree(nvdev->chan_table[i].mrc.slots);
kfree(nvdev);
}
@@ -104,7 +106,8 @@ static void netvsc_destroy_buf(struct hv_device *device)
{
struct nvsp_message *revoke_packet;
struct net_device *ndev = hv_get_drvdata(device);
- struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
+ struct net_device_context *ndc = netdev_priv(ndev);
+ struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev);
int ret;
/*
@@ -144,6 +147,7 @@ static void netvsc_destroy_buf(struct hv_device *device)
"revoke receive buffer to netvsp\n");
return;
}
+ net_device->recv_section_cnt = 0;
}
/* Teardown the gpadl on the vsp end */
@@ -168,19 +172,13 @@ static void netvsc_destroy_buf(struct hv_device *device)
net_device->recv_buf = NULL;
}
- if (net_device->recv_section) {
- net_device->recv_section_cnt = 0;
- kfree(net_device->recv_section);
- net_device->recv_section = NULL;
- }
-
/* Deal with the send buffer we may have setup.
* If we got a send section size, it means we received a
* NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
* NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
* to send a revoke msg here
*/
- if (net_device->send_section_size) {
+ if (net_device->send_section_cnt) {
/* Send the revoke receive buffer */
revoke_packet = &net_device->revoke_packet;
memset(revoke_packet, 0, sizeof(struct nvsp_message));
@@ -212,6 +210,7 @@ static void netvsc_destroy_buf(struct hv_device *device)
"revoke send buffer to netvsp\n");
return;
}
+ net_device->send_section_cnt = 0;
}
/* Teardown the gpadl on the vsp end */
if (net_device->send_buf_gpadl_handle) {
@@ -236,25 +235,40 @@ static void netvsc_destroy_buf(struct hv_device *device)
kfree(net_device->send_section_map);
}
+int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
+{
+ struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
+ int node = cpu_to_node(nvchan->channel->target_cpu);
+ size_t size;
+
+ size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data);
+ nvchan->mrc.slots = vzalloc_node(size, node);
+ if (!nvchan->mrc.slots)
+ nvchan->mrc.slots = vzalloc(size);
+
+ return nvchan->mrc.slots ? 0 : -ENOMEM;
+}
+
static int netvsc_init_buf(struct hv_device *device,
- struct netvsc_device *net_device)
+ struct netvsc_device *net_device,
+ const struct netvsc_device_info *device_info)
{
- int ret = 0;
+ struct nvsp_1_message_send_receive_buffer_complete *resp;
+ struct net_device *ndev = hv_get_drvdata(device);
struct nvsp_message *init_packet;
- struct net_device *ndev;
+ unsigned int buf_size;
size_t map_words;
- int node;
-
- ndev = hv_get_drvdata(device);
+ int ret = 0;
- node = cpu_to_node(device->channel->target_cpu);
- net_device->recv_buf = vzalloc_node(net_device->recv_buf_size, node);
- if (!net_device->recv_buf)
- net_device->recv_buf = vzalloc(net_device->recv_buf_size);
+ /* Get receive buffer area. */
+ buf_size = device_info->recv_sections * net_device->recv_section_size;
+ buf_size = roundup(buf_size, PAGE_SIZE);
+ net_device->recv_buf = vzalloc(buf_size);
if (!net_device->recv_buf) {
- netdev_err(ndev, "unable to allocate receive "
- "buffer of size %d\n", net_device->recv_buf_size);
+ netdev_err(ndev,
+ "unable to allocate receive buffer of size %u\n",
+ buf_size);
ret = -ENOMEM;
goto cleanup;
}
@@ -265,7 +279,7 @@ static int netvsc_init_buf(struct hv_device *device,
* than the channel to establish the gpadl handle.
*/
ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
- net_device->recv_buf_size,
+ buf_size,
&net_device->recv_buf_gpadl_handle);
if (ret != 0) {
netdev_err(ndev,
@@ -297,49 +311,45 @@ static int netvsc_init_buf(struct hv_device *device,
wait_for_completion(&net_device->channel_init_wait);
/* Check the response */
- if (init_packet->msg.v1_msg.
- send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
- netdev_err(ndev, "Unable to complete receive buffer "
- "initialization with NetVsp - status %d\n",
- init_packet->msg.v1_msg.
- send_recv_buf_complete.status);
+ resp = &init_packet->msg.v1_msg.send_recv_buf_complete;
+ if (resp->status != NVSP_STAT_SUCCESS) {
+ netdev_err(ndev,
+ "Unable to complete receive buffer initialization with NetVsp - status %d\n",
+ resp->status);
ret = -EINVAL;
goto cleanup;
}
/* Parse the response */
+ netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n",
+ resp->num_sections, resp->sections[0].sub_alloc_size,
+ resp->sections[0].num_sub_allocs);
- net_device->recv_section_cnt = init_packet->msg.
- v1_msg.send_recv_buf_complete.num_sections;
-
- net_device->recv_section = kmemdup(
- init_packet->msg.v1_msg.send_recv_buf_complete.sections,
- net_device->recv_section_cnt *
- sizeof(struct nvsp_1_receive_buffer_section),
- GFP_KERNEL);
- if (net_device->recv_section == NULL) {
+ /* There should only be one section for the entire receive buffer */
+ if (resp->num_sections != 1 || resp->sections[0].offset != 0) {
ret = -EINVAL;
goto cleanup;
}
- /*
- * For 1st release, there should only be 1 section that represents the
- * entire receive buffer
- */
- if (net_device->recv_section_cnt != 1 ||
- net_device->recv_section->offset != 0) {
- ret = -EINVAL;
+ net_device->recv_section_size = resp->sections[0].sub_alloc_size;
+ net_device->recv_section_cnt = resp->sections[0].num_sub_allocs;
+
+ /* Setup receive completion ring */
+ net_device->recv_completion_cnt
+ = round_up(net_device->recv_section_cnt + 1,
+ PAGE_SIZE / sizeof(u64));
+ ret = netvsc_alloc_recv_comp_ring(net_device, 0);
+ if (ret)
goto cleanup;
- }
- /* Now setup the send buffer.
- */
- net_device->send_buf = vzalloc_node(net_device->send_buf_size, node);
- if (!net_device->send_buf)
- net_device->send_buf = vzalloc(net_device->send_buf_size);
+ /* Now setup the send buffer. */
+ buf_size = device_info->send_sections * net_device->send_section_size;
+ buf_size = round_up(buf_size, PAGE_SIZE);
+
+ net_device->send_buf = vzalloc(buf_size);
if (!net_device->send_buf) {
- netdev_err(ndev, "unable to allocate send "
- "buffer of size %d\n", net_device->send_buf_size);
+ netdev_err(ndev, "unable to allocate send buffer of size %u\n",
+ buf_size);
ret = -ENOMEM;
goto cleanup;
}
@@ -349,7 +359,7 @@ static int netvsc_init_buf(struct hv_device *device,
* than the channel to establish the gpadl handle.
*/
ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
- net_device->send_buf_size,
+ buf_size,
&net_device->send_buf_gpadl_handle);
if (ret != 0) {
netdev_err(ndev,
@@ -394,10 +404,8 @@ static int netvsc_init_buf(struct hv_device *device,
net_device->send_section_size = init_packet->msg.
v1_msg.send_send_buf_complete.section_size;
- /* Section count is simply the size divided by the section size.
- */
- net_device->send_section_cnt =
- net_device->send_buf_size / net_device->send_section_size;
+ /* Section count is simply the size divided by the section size. */
+ net_device->send_section_cnt = buf_size / net_device->send_section_size;
netdev_dbg(ndev, "Send section size: %d, Section count:%d\n",
net_device->send_section_size, net_device->send_section_cnt);
@@ -475,7 +483,8 @@ static int negotiate_nvsp_ver(struct hv_device *device,
}
static int netvsc_connect_vsp(struct hv_device *device,
- struct netvsc_device *net_device)
+ struct netvsc_device *net_device,
+ const struct netvsc_device_info *device_info)
{
const u32 ver_list[] = {
NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
@@ -525,14 +534,8 @@ static int netvsc_connect_vsp(struct hv_device *device,
if (ret != 0)
goto cleanup;
- /* Post the big receive buffer to NetVSP */
- if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
- net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
- else
- net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
- net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
- ret = netvsc_init_buf(device, net_device);
+ ret = netvsc_init_buf(device, net_device, device_info);
cleanup:
return ret;
@@ -550,7 +553,8 @@ void netvsc_device_remove(struct hv_device *device)
{
struct net_device *ndev = hv_get_drvdata(device);
struct net_device_context *net_device_ctx = netdev_priv(ndev);
- struct netvsc_device *net_device = net_device_ctx->nvdev;
+ struct netvsc_device *net_device
+ = rtnl_dereference(net_device_ctx->nvdev);
int i;
netvsc_disconnect_vsp(device);
@@ -693,7 +697,7 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
u32 pend_size,
struct hv_netvsc_packet *packet,
struct rndis_message *rndis_msg,
- struct hv_page_buffer **pb,
+ struct hv_page_buffer *pb,
struct sk_buff *skb)
{
char *start = net_device->send_buf;
@@ -714,9 +718,9 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
}
for (i = 0; i < page_count; i++) {
- char *src = phys_to_virt((*pb)[i].pfn << PAGE_SHIFT);
- u32 offset = (*pb)[i].offset;
- u32 len = (*pb)[i].len;
+ char *src = phys_to_virt(pb[i].pfn << PAGE_SHIFT);
+ u32 offset = pb[i].offset;
+ u32 len = pb[i].len;
memcpy(dest, (src + offset), len);
msg_size += len;
@@ -735,36 +739,32 @@ static inline int netvsc_send_pkt(
struct hv_device *device,
struct hv_netvsc_packet *packet,
struct netvsc_device *net_device,
- struct hv_page_buffer **pb,
+ struct hv_page_buffer *pb,
struct sk_buff *skb)
{
struct nvsp_message nvmsg;
- struct netvsc_channel *nvchan
- = &net_device->chan_table[packet->q_idx];
+ struct nvsp_1_message_send_rndis_packet * const rpkt =
+ &nvmsg.msg.v1_msg.send_rndis_pkt;
+ struct netvsc_channel * const nvchan =
+ &net_device->chan_table[packet->q_idx];
struct vmbus_channel *out_channel = nvchan->channel;
struct net_device *ndev = hv_get_drvdata(device);
struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
u64 req_id;
int ret;
- struct hv_page_buffer *pgbuf;
u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound);
nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
- if (skb != NULL) {
- /* 0 is RMC_DATA; */
- nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 0;
- } else {
- /* 1 is RMC_CONTROL; */
- nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 1;
- }
+ if (skb)
+ rpkt->channel_type = 0; /* 0 is RMC_DATA */
+ else
+ rpkt->channel_type = 1; /* 1 is RMC_CONTROL */
- nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
- packet->send_buf_index;
+ rpkt->send_buf_section_index = packet->send_buf_index;
if (packet->send_buf_index == NETVSC_INVALID_INDEX)
- nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
+ rpkt->send_buf_section_size = 0;
else
- nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size =
- packet->total_data_buflen;
+ rpkt->send_buf_section_size = packet->total_data_buflen;
req_id = (ulong)skb;
@@ -772,21 +772,18 @@ static inline int netvsc_send_pkt(
return -ENODEV;
if (packet->page_buf_cnt) {
- pgbuf = packet->cp_partial ? (*pb) +
- packet->rmsg_pgcnt : (*pb);
- ret = vmbus_sendpacket_pagebuffer_ctl(out_channel,
- pgbuf,
- packet->page_buf_cnt,
- &nvmsg,
- sizeof(struct nvsp_message),
- req_id,
- VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (packet->cp_partial)
+ pb += packet->rmsg_pgcnt;
+
+ ret = vmbus_sendpacket_pagebuffer(out_channel,
+ pb, packet->page_buf_cnt,
+ &nvmsg, sizeof(nvmsg),
+ req_id);
} else {
- ret = vmbus_sendpacket_ctl(out_channel, &nvmsg,
- sizeof(struct nvsp_message),
- req_id,
- VM_PKT_DATA_INBAND,
- VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ ret = vmbus_sendpacket(out_channel,
+ &nvmsg, sizeof(nvmsg),
+ req_id, VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
}
if (ret == 0) {
@@ -801,8 +798,10 @@ static inline int netvsc_send_pkt(
ret = -ENOSPC;
}
} else {
- netdev_err(ndev, "Unable to send packet %p ret %d\n",
- packet, ret);
+ netdev_err(ndev,
+ "Unable to send packet pages %u len %u, ret %d\n",
+ packet->page_buf_cnt, packet->total_data_buflen,
+ ret);
}
return ret;
@@ -820,13 +819,16 @@ static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
msdp->count = 0;
}
-int netvsc_send(struct hv_device *device,
+/* RCU already held by caller */
+int netvsc_send(struct net_device_context *ndev_ctx,
struct hv_netvsc_packet *packet,
struct rndis_message *rndis_msg,
- struct hv_page_buffer **pb,
+ struct hv_page_buffer *pb,
struct sk_buff *skb)
{
- struct netvsc_device *net_device = hv_device_to_netvsc_device(device);
+ struct netvsc_device *net_device
+ = rcu_dereference_bh(ndev_ctx->nvdev);
+ struct hv_device *device = ndev_ctx->device_ctx;
int ret = 0;
struct netvsc_channel *nvchan;
u32 pktlen = packet->total_data_buflen, msd_len = 0;
@@ -838,7 +840,7 @@ int netvsc_send(struct hv_device *device,
bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
/* If device is rescinded, return error and packet will get dropped. */
- if (unlikely(net_device->destroy))
+ if (unlikely(!net_device || net_device->destroy))
return -ENODEV;
/* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get
@@ -878,7 +880,9 @@ int netvsc_send(struct hv_device *device,
} else if (pktlen + net_device->pkt_align <
net_device->send_section_size) {
section_index = netvsc_get_next_send_section(net_device);
- if (section_index != NETVSC_INVALID_INDEX) {
+ if (unlikely(section_index == NETVSC_INVALID_INDEX)) {
+ ++ndev_ctx->eth_stats.tx_send_full;
+ } else {
move_pkt_msd(&msd_send, &msd_skb, msdp);
msd_len = 0;
}
@@ -943,130 +947,99 @@ send_now:
return ret;
}
-static int netvsc_send_recv_completion(struct vmbus_channel *channel,
- u64 transaction_id, u32 status)
+/* Send pending recv completions */
+static int send_recv_completions(struct net_device *ndev,
+ struct netvsc_device *nvdev,
+ struct netvsc_channel *nvchan)
{
- struct nvsp_message recvcompMessage;
+ struct multi_recv_comp *mrc = &nvchan->mrc;
+ struct recv_comp_msg {
+ struct nvsp_message_header hdr;
+ u32 status;
+ } __packed;
+ struct recv_comp_msg msg = {
+ .hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE,
+ };
int ret;
- recvcompMessage.hdr.msg_type =
- NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;
-
- recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
-
- /* Send the completion */
- ret = vmbus_sendpacket(channel, &recvcompMessage,
- sizeof(struct nvsp_message_header) + sizeof(u32),
- transaction_id, VM_PKT_COMP, 0);
-
- return ret;
-}
-
-static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx,
- u32 *filled, u32 *avail)
-{
- struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
- u32 first = mrc->first;
- u32 next = mrc->next;
-
- *filled = (first > next) ? NETVSC_RECVSLOT_MAX - first + next :
- next - first;
+ while (mrc->first != mrc->next) {
+ const struct recv_comp_data *rcd
+ = mrc->slots + mrc->first;
- *avail = NETVSC_RECVSLOT_MAX - *filled - 1;
-}
+ msg.status = rcd->status;
+ ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg),
+ rcd->tid, VM_PKT_COMP, 0);
+ if (unlikely(ret)) {
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
-/* Read the first filled slot, no change to index */
-static inline struct recv_comp_data *read_recv_comp_slot(struct netvsc_device
- *nvdev, u16 q_idx)
-{
- struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
- u32 filled, avail;
+ ++ndev_ctx->eth_stats.rx_comp_busy;
+ return ret;
+ }
- if (unlikely(!mrc->buf))
- return NULL;
+ if (++mrc->first == nvdev->recv_completion_cnt)
+ mrc->first = 0;
+ }
- count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
- if (!filled)
- return NULL;
+ /* receive completion ring has been emptied */
+ if (unlikely(nvdev->destroy))
+ wake_up(&nvdev->wait_drain);
- return mrc->buf + mrc->first * sizeof(struct recv_comp_data);
+ return 0;
}
-/* Put the first filled slot back to available pool */
-static inline void put_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx)
+/* Count how many receive completions are outstanding */
+static void recv_comp_slot_avail(const struct netvsc_device *nvdev,
+ const struct multi_recv_comp *mrc,
+ u32 *filled, u32 *avail)
{
- struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
- int num_recv;
-
- mrc->first = (mrc->first + 1) % NETVSC_RECVSLOT_MAX;
+ u32 count = nvdev->recv_completion_cnt;
- num_recv = atomic_dec_return(&nvdev->num_outstanding_recvs);
+ if (mrc->next >= mrc->first)
+ *filled = mrc->next - mrc->first;
+ else
+ *filled = (count - mrc->first) + mrc->next;
- if (nvdev->destroy && num_recv == 0)
- wake_up(&nvdev->wait_drain);
+ *avail = count - *filled - 1;
}
-/* Check and send pending recv completions */
-static void netvsc_chk_recv_comp(struct netvsc_device *nvdev,
- struct vmbus_channel *channel, u16 q_idx)
+/* Add receive complete to ring to send to host. */
+static void enq_receive_complete(struct net_device *ndev,
+ struct netvsc_device *nvdev, u16 q_idx,
+ u64 tid, u32 status)
{
+ struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx];
+ struct multi_recv_comp *mrc = &nvchan->mrc;
struct recv_comp_data *rcd;
- int ret;
-
- while (true) {
- rcd = read_recv_comp_slot(nvdev, q_idx);
- if (!rcd)
- break;
+ u32 filled, avail;
- ret = netvsc_send_recv_completion(channel, rcd->tid,
- rcd->status);
- if (ret)
- break;
+ recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
- put_recv_comp_slot(nvdev, q_idx);
+ if (unlikely(filled > NAPI_POLL_WEIGHT)) {
+ send_recv_completions(ndev, nvdev, nvchan);
+ recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
}
-}
-
-#define NETVSC_RCD_WATERMARK 80
-
-/* Get next available slot */
-static inline struct recv_comp_data *get_recv_comp_slot(
- struct netvsc_device *nvdev, struct vmbus_channel *channel, u16 q_idx)
-{
- struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
- u32 filled, avail, next;
- struct recv_comp_data *rcd;
- if (unlikely(!nvdev->recv_section))
- return NULL;
-
- if (unlikely(!mrc->buf))
- return NULL;
-
- if (atomic_read(&nvdev->num_outstanding_recvs) >
- nvdev->recv_section->num_sub_allocs * NETVSC_RCD_WATERMARK / 100)
- netvsc_chk_recv_comp(nvdev, channel, q_idx);
-
- count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
- if (!avail)
- return NULL;
-
- next = mrc->next;
- rcd = mrc->buf + next * sizeof(struct recv_comp_data);
- mrc->next = (next + 1) % NETVSC_RECVSLOT_MAX;
+ if (unlikely(!avail)) {
+ netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
+ q_idx, tid);
+ return;
+ }
- atomic_inc(&nvdev->num_outstanding_recvs);
+ rcd = mrc->slots + mrc->next;
+ rcd->tid = tid;
+ rcd->status = status;
- return rcd;
+ if (++mrc->next == nvdev->recv_completion_cnt)
+ mrc->next = 0;
}
static int netvsc_receive(struct net_device *ndev,
- struct netvsc_device *net_device,
- struct net_device_context *net_device_ctx,
- struct hv_device *device,
- struct vmbus_channel *channel,
- const struct vmpacket_descriptor *desc,
- struct nvsp_message *nvsp)
+ struct netvsc_device *net_device,
+ struct net_device_context *net_device_ctx,
+ struct hv_device *device,
+ struct vmbus_channel *channel,
+ const struct vmpacket_descriptor *desc,
+ struct nvsp_message *nvsp)
{
const struct vmtransfer_page_packet_header *vmxferpage_packet
= container_of(desc, const struct vmtransfer_page_packet_header, d);
@@ -1075,7 +1048,6 @@ static int netvsc_receive(struct net_device *ndev,
u32 status = NVSP_STAT_SUCCESS;
int i;
int count = 0;
- int ret;
/* Make sure this is a valid nvsp packet */
if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
@@ -1106,25 +1078,9 @@ static int netvsc_receive(struct net_device *ndev,
channel, data, buflen);
}
- if (net_device->chan_table[q_idx].mrc.buf) {
- struct recv_comp_data *rcd;
+ enq_receive_complete(ndev, net_device, q_idx,
+ vmxferpage_packet->d.trans_id, status);
- rcd = get_recv_comp_slot(net_device, channel, q_idx);
- if (rcd) {
- rcd->tid = vmxferpage_packet->d.trans_id;
- rcd->status = status;
- } else {
- netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
- q_idx, vmxferpage_packet->d.trans_id);
- }
- } else {
- ret = netvsc_send_recv_completion(channel,
- vmxferpage_packet->d.trans_id,
- status);
- if (ret)
- netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n",
- q_idx, vmxferpage_packet->d.trans_id, ret);
- }
return count;
}
@@ -1220,11 +1176,10 @@ int netvsc_poll(struct napi_struct *napi, int budget)
{
struct netvsc_channel *nvchan
= container_of(napi, struct netvsc_channel, napi);
+ struct netvsc_device *net_device = nvchan->net_device;
struct vmbus_channel *channel = nvchan->channel;
struct hv_device *device = netvsc_channel_to_device(channel);
- u16 q_idx = channel->offermsg.offer.sub_channel_index;
struct net_device *ndev = hv_get_drvdata(device);
- struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
int work_done = 0;
/* If starting a new interval */
@@ -1237,17 +1192,19 @@ int netvsc_poll(struct napi_struct *napi, int budget)
nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
}
- /* If receive ring was exhausted
- * and not doing busy poll
+ /* If send of pending receive completions suceeded
+ * and did not exhaust NAPI budget this time
+ * and not doing busy poll
* then re-enable host interrupts
- * and reschedule if ring is not empty.
+ * and reschedule if ring is not empty.
*/
- if (work_done < budget &&
+ if (send_recv_completions(ndev, net_device, nvchan) == 0 &&
+ work_done < budget &&
napi_complete_done(napi, work_done) &&
- hv_end_read(&channel->inbound) != 0)
+ hv_end_read(&channel->inbound)) {
+ hv_begin_read(&channel->inbound);
napi_reschedule(napi);
-
- netvsc_chk_recv_comp(net_device, channel, q_idx);
+ }
/* Driver may overshoot since multiple packets per descriptor */
return min(work_done, budget);
@@ -1259,10 +1216,15 @@ int netvsc_poll(struct napi_struct *napi, int budget)
void netvsc_channel_cb(void *context)
{
struct netvsc_channel *nvchan = context;
+ struct vmbus_channel *channel = nvchan->channel;
+ struct hv_ring_buffer_info *rbi = &channel->inbound;
+
+ /* preload first vmpacket descriptor */
+ prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
if (napi_schedule_prep(&nvchan->napi)) {
/* disable interupts from host */
- hv_begin_read(&nvchan->channel->inbound);
+ hv_begin_read(rbi);
__napi_schedule(&nvchan->napi);
}
@@ -1272,8 +1234,8 @@ void netvsc_channel_cb(void *context)
* netvsc_device_add - Callback when the device belonging to this
* driver is added
*/
-int netvsc_device_add(struct hv_device *device,
- const struct netvsc_device_info *device_info)
+struct netvsc_device *netvsc_device_add(struct hv_device *device,
+ const struct netvsc_device_info *device_info)
{
int i, ret = 0;
int ring_size = device_info->ring_size;
@@ -1283,7 +1245,7 @@ int netvsc_device_add(struct hv_device *device,
net_device = alloc_net_device();
if (!net_device)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
net_device->ring_size = ring_size;
@@ -1303,6 +1265,7 @@ int netvsc_device_add(struct hv_device *device,
struct netvsc_channel *nvchan = &net_device->chan_table[i];
nvchan->channel = device->channel;
+ nvchan->net_device = net_device;
u64_stats_init(&nvchan->tx_stats.syncp);
u64_stats_init(&nvchan->rx_stats.syncp);
}
@@ -1334,17 +1297,18 @@ int netvsc_device_add(struct hv_device *device,
rcu_assign_pointer(net_device_ctx->nvdev, net_device);
/* Connect with the NetVsp */
- ret = netvsc_connect_vsp(device, net_device);
+ ret = netvsc_connect_vsp(device, net_device, device_info);
if (ret != 0) {
netdev_err(ndev,
"unable to connect to NetVSP - %d\n", ret);
goto close;
}
- return ret;
+ return net_device;
close:
- netif_napi_del(&net_device->chan_table[0].napi);
+ RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
+ napi_disable(&net_device->chan_table[0].napi);
/* Now, we can close the channel safely */
vmbus_close(device->channel);
@@ -1352,6 +1316,5 @@ close:
cleanup:
free_netvsc_device(&net_device->rcu);
- return ret;
-
+ return ERR_PTR(ret);
}
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 0d78727f1a14..165ba4b3b423 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -33,6 +33,9 @@
#include <linux/if_vlan.h>
#include <linux/in.h>
#include <linux/slab.h>
+#include <linux/rtnetlink.h>
+#include <linux/netpoll.h>
+
#include <net/arp.h>
#include <net/route.h>
#include <net/sock.h>
@@ -42,8 +45,14 @@
#include "hyperv_net.h"
-#define RING_SIZE_MIN 64
+#define RING_SIZE_MIN 64
+#define NETVSC_MIN_TX_SECTIONS 10
+#define NETVSC_DEFAULT_TX 192 /* ~1M */
+#define NETVSC_MIN_RX_SECTIONS 10 /* ~64K */
+#define NETVSC_DEFAULT_RX 2048 /* ~4M */
+
#define LINKCHANGE_INT (2 * HZ)
+#define VF_TAKEOVER_INT (HZ / 10)
static int ring_size = 128;
module_param(ring_size, int, S_IRUGO);
@@ -69,7 +78,8 @@ static void netvsc_set_multicast_list(struct net_device *net)
static int netvsc_open(struct net_device *net)
{
struct net_device_context *ndev_ctx = netdev_priv(net);
- struct netvsc_device *nvdev = ndev_ctx->nvdev;
+ struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndev_ctx->nvdev);
struct rndis_device *rdev;
int ret = 0;
@@ -85,22 +95,40 @@ static int netvsc_open(struct net_device *net)
netif_tx_wake_all_queues(net);
rdev = nvdev->extension;
- if (!rdev->link_state && !ndev_ctx->datapath)
+
+ if (!rdev->link_state)
netif_carrier_on(net);
- return ret;
+ if (vf_netdev) {
+ /* Setting synthetic device up transparently sets
+ * slave as up. If open fails, then slave will be
+ * still be offline (and not used).
+ */
+ ret = dev_open(vf_netdev);
+ if (ret)
+ netdev_warn(net,
+ "unable to open slave: %s: %d\n",
+ vf_netdev->name, ret);
+ }
+ return 0;
}
static int netvsc_close(struct net_device *net)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
+ struct net_device *vf_netdev
+ = rtnl_dereference(net_device_ctx->vf_netdev);
struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
- int ret;
+ int ret = 0;
u32 aread, i, msec = 10, retry = 0, retry_max = 20;
struct vmbus_channel *chn;
netif_tx_disable(net);
+ /* No need to close rndis filter if it is removed already */
+ if (!nvdev)
+ goto out;
+
ret = rndis_filter_close(nvdev);
if (ret != 0) {
netdev_err(net, "unable to close device (ret %d).\n", ret);
@@ -139,11 +167,15 @@ static int netvsc_close(struct net_device *net)
ret = -ETIMEDOUT;
}
+out:
+ if (vf_netdev)
+ dev_close(vf_netdev);
+
return ret;
}
static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
- int pkt_type)
+ int pkt_type)
{
struct rndis_packet *rndis_pkt;
struct rndis_per_packet_info *ppi;
@@ -163,10 +195,12 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
return ppi;
}
-/* Azure hosts don't support non-TCP port numbers in hashing yet. We compute
- * hash for non-TCP traffic with only IP numbers.
+/* Azure hosts don't support non-TCP port numbers in hashing for fragmented
+ * packets. We can use ethtool to change UDP hash level when necessary.
*/
-static inline u32 netvsc_get_hash(struct sk_buff *skb, struct sock *sk)
+static inline u32 netvsc_get_hash(
+ struct sk_buff *skb,
+ const struct net_device_context *ndc)
{
struct flow_keys flow;
u32 hash;
@@ -177,7 +211,11 @@ static inline u32 netvsc_get_hash(struct sk_buff *skb, struct sock *sk)
if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
return 0;
- if (flow.basic.ip_proto == IPPROTO_TCP) {
+ if (flow.basic.ip_proto == IPPROTO_TCP ||
+ (flow.basic.ip_proto == IPPROTO_UDP &&
+ ((flow.basic.n_proto == htons(ETH_P_IP) && ndc->udp4_l4_hash) ||
+ (flow.basic.n_proto == htons(ETH_P_IPV6) &&
+ ndc->udp6_l4_hash)))) {
return skb_get_hash(skb);
} else {
if (flow.basic.n_proto == htons(ETH_P_IP))
@@ -200,7 +238,7 @@ static inline int netvsc_get_tx_queue(struct net_device *ndev,
struct sock *sk = skb->sk;
int q_idx;
- q_idx = ndc->tx_send_table[netvsc_get_hash(skb, sk) &
+ q_idx = ndc->tx_send_table[netvsc_get_hash(skb, ndc) &
(VRSS_SEND_TAB_SIZE - 1)];
/* If queue index changed record the new value */
@@ -222,13 +260,11 @@ static inline int netvsc_get_tx_queue(struct net_device *ndev,
*
* TODO support XPS - but get_xps_queue not exported
*/
-static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
- void *accel_priv, select_queue_fallback_t fallback)
+static u16 netvsc_pick_tx(struct net_device *ndev, struct sk_buff *skb)
{
- unsigned int num_tx_queues = ndev->real_num_tx_queues;
int q_idx = sk_tx_queue_get(skb->sk);
- if (q_idx < 0 || skb->ooo_okay) {
+ if (q_idx < 0 || skb->ooo_okay || q_idx >= ndev->real_num_tx_queues) {
/* If forwarding a packet, we use the recorded queue when
* available for better cache locality.
*/
@@ -238,14 +274,35 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
q_idx = netvsc_get_tx_queue(ndev, skb, q_idx);
}
- while (unlikely(q_idx >= num_tx_queues))
- q_idx -= num_tx_queues;
-
return q_idx;
}
+static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
+ void *accel_priv,
+ select_queue_fallback_t fallback)
+{
+ struct net_device_context *ndc = netdev_priv(ndev);
+ struct net_device *vf_netdev;
+ u16 txq;
+
+ rcu_read_lock();
+ vf_netdev = rcu_dereference(ndc->vf_netdev);
+ if (vf_netdev) {
+ txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
+ qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
+ } else {
+ txq = netvsc_pick_tx(ndev, skb);
+ }
+ rcu_read_unlock();
+
+ while (unlikely(txq >= ndev->real_num_tx_queues))
+ txq -= ndev->real_num_tx_queues;
+
+ return txq;
+}
+
static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
- struct hv_page_buffer *pb)
+ struct hv_page_buffer *pb)
{
int j = 0;
@@ -280,9 +337,8 @@ static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
struct hv_netvsc_packet *packet,
- struct hv_page_buffer **page_buf)
+ struct hv_page_buffer *pb)
{
- struct hv_page_buffer *pb = *page_buf;
u32 slots_used = 0;
char *data = skb->data;
int frags = skb_shinfo(skb)->nr_frags;
@@ -293,10 +349,9 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
* 2. skb linear data
* 3. skb fragment data
*/
- if (hdr != NULL)
- slots_used += fill_pg_buf(virt_to_page(hdr),
- offset_in_page(hdr),
- len, &pb[slots_used]);
+ slots_used += fill_pg_buf(virt_to_page(hdr),
+ offset_in_page(hdr),
+ len, &pb[slots_used]);
packet->rmsg_size = len;
packet->rmsg_pgcnt = slots_used;
@@ -359,13 +414,40 @@ static u32 net_checksum_info(struct sk_buff *skb)
if (ip6->nexthdr == IPPROTO_TCP)
return TRANSPORT_INFO_IPV6_TCP;
- else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP)
+ else if (ip6->nexthdr == IPPROTO_UDP)
return TRANSPORT_INFO_IPV6_UDP;
}
return TRANSPORT_INFO_NOT_IP;
}
+/* Send skb on the slave VF device. */
+static int netvsc_vf_xmit(struct net_device *net, struct net_device *vf_netdev,
+ struct sk_buff *skb)
+{
+ struct net_device_context *ndev_ctx = netdev_priv(net);
+ unsigned int len = skb->len;
+ int rc;
+
+ skb->dev = vf_netdev;
+ skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping;
+
+ rc = dev_queue_xmit(skb);
+ if (likely(rc == NET_XMIT_SUCCESS || rc == NET_XMIT_CN)) {
+ struct netvsc_vf_pcpu_stats *pcpu_stats
+ = this_cpu_ptr(ndev_ctx->vf_stats);
+
+ u64_stats_update_begin(&pcpu_stats->syncp);
+ pcpu_stats->tx_packets++;
+ pcpu_stats->tx_bytes += len;
+ u64_stats_update_end(&pcpu_stats->syncp);
+ } else {
+ this_cpu_inc(ndev_ctx->vf_stats->tx_dropped);
+ }
+
+ return rc;
+}
+
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
@@ -374,11 +456,19 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
unsigned int num_data_pgs;
struct rndis_message *rndis_msg;
struct rndis_packet *rndis_pkt;
+ struct net_device *vf_netdev;
u32 rndis_msg_size;
struct rndis_per_packet_info *ppi;
u32 hash;
- struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
- struct hv_page_buffer *pb = page_buf;
+ struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT];
+
+ /* if VF is present and up then redirect packets
+ * already called with rcu_read_lock_bh
+ */
+ vf_netdev = rcu_dereference_bh(net_device_ctx->vf_netdev);
+ if (vf_netdev && netif_running(vf_netdev) &&
+ !netpoll_tx_running(net))
+ return netvsc_vf_xmit(net, vf_netdev, skb);
/* We will atmost need two pages to describe the rndis
* header. We can only transmit MAX_PAGE_BUFFER_COUNT number
@@ -448,9 +538,9 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
rndis_msg_size += NDIS_VLAN_PPI_SIZE;
ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
- IEEE_8021Q_INFO);
- vlan = (struct ndis_pkt_8021q_info *)((void *)ppi +
- ppi->ppi_offset);
+ IEEE_8021Q_INFO);
+
+ vlan = (void *)ppi + ppi->ppi_offset;
vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK;
vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >>
VLAN_PRIO_SHIFT;
@@ -463,8 +553,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
TCP_LARGESEND_PKTINFO);
- lso_info = (struct ndis_tcp_lso_info *)((void *)ppi +
- ppi->ppi_offset);
+ lso_info = (void *)ppi + ppi->ppi_offset;
lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
if (skb->protocol == htons(ETH_P_IP)) {
@@ -524,12 +613,12 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
rndis_msg->msg_len += rndis_msg_size;
packet->total_data_buflen = rndis_msg->msg_len;
packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
- skb, packet, &pb);
+ skb, packet, pb);
/* timestamp packet in software */
skb_tx_timestamp(skb);
- ret = netvsc_send(net_device_ctx->device_ctx, packet,
- rndis_msg, &pb, skb);
+
+ ret = netvsc_send(net_device_ctx, packet, rndis_msg, pb, skb);
if (likely(ret == 0))
return NETDEV_TX_OK;
@@ -551,6 +640,7 @@ no_memory:
++net_device_ctx->eth_stats.tx_no_memory;
goto drop;
}
+
/*
* netvsc_linkstatus_callback - Link up/down notification
*/
@@ -574,8 +664,8 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) {
u32 speed;
- speed = *(u32 *)((void *)indicate + indicate->
- status_buf_offset) / 10000;
+ speed = *(u32 *)((void *)indicate
+ + indicate->status_buf_offset) / 10000;
ndev_ctx->speed = speed;
return;
}
@@ -658,29 +748,18 @@ int netvsc_recv_callback(struct net_device *net,
struct netvsc_device *net_device;
u16 q_idx = channel->offermsg.offer.sub_channel_index;
struct netvsc_channel *nvchan;
- struct net_device *vf_netdev;
struct sk_buff *skb;
struct netvsc_stats *rx_stats;
if (net->reg_state != NETREG_REGISTERED)
return NVSP_STAT_FAIL;
- /*
- * If necessary, inject this packet into the VF interface.
- * On Hyper-V, multicast and brodcast packets are only delivered
- * to the synthetic interface (after subjecting these to
- * policy filters on the host). Deliver these via the VF
- * interface in the guest.
- */
rcu_read_lock();
net_device = rcu_dereference(net_device_ctx->nvdev);
if (unlikely(!net_device))
goto drop;
nvchan = &net_device->chan_table[q_idx];
- vf_netdev = rcu_dereference(net_device_ctx->vf_netdev);
- if (vf_netdev && (vf_netdev->flags & IFF_UP))
- net = vf_netdev;
/* Allocate a skb - TODO direct I/O to pages? */
skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
@@ -692,8 +771,7 @@ drop:
return NVSP_STAT_FAIL;
}
- if (net != vf_netdev)
- skb_record_rx_queue(skb, q_idx);
+ skb_record_rx_queue(skb, q_idx);
/*
* Even if injecting the packet, record the statistics
@@ -736,48 +814,22 @@ static void netvsc_get_channels(struct net_device *net,
}
}
-static int netvsc_set_queues(struct net_device *net, struct hv_device *dev,
- u32 num_chn)
-{
- struct netvsc_device_info device_info;
- int ret;
-
- memset(&device_info, 0, sizeof(device_info));
- device_info.num_chn = num_chn;
- device_info.ring_size = ring_size;
- device_info.max_num_vrss_chns = num_chn;
-
- ret = rndis_filter_device_add(dev, &device_info);
- if (ret)
- return ret;
-
- ret = netif_set_real_num_tx_queues(net, num_chn);
- if (ret)
- return ret;
-
- ret = netif_set_real_num_rx_queues(net, num_chn);
-
- return ret;
-}
-
static int netvsc_set_channels(struct net_device *net,
struct ethtool_channels *channels)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct hv_device *dev = net_device_ctx->device_ctx;
struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
- unsigned int count = channels->combined_count;
- bool was_running;
- int ret;
+ unsigned int orig, count = channels->combined_count;
+ struct netvsc_device_info device_info;
+ bool was_opened;
+ int ret = 0;
/* We do not support separate count for rx, tx, or other */
if (count == 0 ||
channels->rx_count || channels->tx_count || channels->other_count)
return -EINVAL;
- if (count > net->num_tx_queues || count > VRSS_CHANNEL_MAX)
- return -EINVAL;
-
if (!nvdev || nvdev->destroy)
return -ENODEV;
@@ -787,25 +839,40 @@ static int netvsc_set_channels(struct net_device *net,
if (count > nvdev->max_chn)
return -EINVAL;
- was_running = netif_running(net);
- if (was_running) {
- ret = netvsc_close(net);
- if (ret)
- return ret;
- }
+ orig = nvdev->num_chn;
+ was_opened = rndis_filter_opened(nvdev);
+ if (was_opened)
+ rndis_filter_close(nvdev);
+
+ memset(&device_info, 0, sizeof(device_info));
+ device_info.num_chn = count;
+ device_info.ring_size = ring_size;
+ device_info.send_sections = nvdev->send_section_cnt;
+ device_info.recv_sections = nvdev->recv_section_cnt;
rndis_filter_device_remove(dev, nvdev);
- ret = netvsc_set_queues(net, dev, count);
- if (ret == 0)
- nvdev->num_chn = count;
- else
- netvsc_set_queues(net, dev, nvdev->num_chn);
+ nvdev = rndis_filter_device_add(dev, &device_info);
+ if (!IS_ERR(nvdev)) {
+ netif_set_real_num_tx_queues(net, nvdev->num_chn);
+ netif_set_real_num_rx_queues(net, nvdev->num_chn);
+ } else {
+ ret = PTR_ERR(nvdev);
+ device_info.num_chn = orig;
+ nvdev = rndis_filter_device_add(dev, &device_info);
- if (was_running)
- ret = netvsc_open(net);
+ if (IS_ERR(nvdev)) {
+ netdev_err(net, "restoring channel setting failed: %ld\n",
+ PTR_ERR(nvdev));
+ return ret;
+ }
+ }
+
+ if (was_opened)
+ rndis_filter_open(nvdev);
/* We may have missed link change notifications */
+ net_device_ctx->last_reconfig = 0;
schedule_delayed_work(&net_device_ctx->dwork, 0);
return ret;
@@ -832,6 +899,9 @@ static void netvsc_init_settings(struct net_device *dev)
{
struct net_device_context *ndc = netdev_priv(dev);
+ ndc->udp4_l4_hash = true;
+ ndc->udp6_l4_hash = true;
+
ndc->speed = SPEED_UNKNOWN;
ndc->duplex = DUPLEX_FULL;
}
@@ -869,41 +939,61 @@ static int netvsc_set_link_ksettings(struct net_device *dev,
static int netvsc_change_mtu(struct net_device *ndev, int mtu)
{
struct net_device_context *ndevctx = netdev_priv(ndev);
+ struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
struct hv_device *hdev = ndevctx->device_ctx;
+ int orig_mtu = ndev->mtu;
struct netvsc_device_info device_info;
- bool was_running;
+ bool was_opened;
int ret = 0;
if (!nvdev || nvdev->destroy)
return -ENODEV;
- was_running = netif_running(ndev);
- if (was_running) {
- ret = netvsc_close(ndev);
+ /* Change MTU of underlying VF netdev first. */
+ if (vf_netdev) {
+ ret = dev_set_mtu(vf_netdev, mtu);
if (ret)
return ret;
}
+ netif_device_detach(ndev);
+ was_opened = rndis_filter_opened(nvdev);
+ if (was_opened)
+ rndis_filter_close(nvdev);
+
memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
device_info.num_chn = nvdev->num_chn;
- device_info.max_num_vrss_chns = nvdev->num_chn;
+ device_info.send_sections = nvdev->send_section_cnt;
+ device_info.recv_sections = nvdev->recv_section_cnt;
rndis_filter_device_remove(hdev, nvdev);
- /* 'nvdev' has been freed in rndis_filter_device_remove() ->
- * netvsc_device_remove () -> free_netvsc_device().
- * We mustn't access it before it's re-created in
- * rndis_filter_device_add() -> netvsc_device_add().
- */
-
ndev->mtu = mtu;
- rndis_filter_device_add(hdev, &device_info);
+ nvdev = rndis_filter_device_add(hdev, &device_info);
+ if (IS_ERR(nvdev)) {
+ ret = PTR_ERR(nvdev);
- if (was_running)
- ret = netvsc_open(ndev);
+ /* Attempt rollback to original MTU */
+ ndev->mtu = orig_mtu;
+ nvdev = rndis_filter_device_add(hdev, &device_info);
+
+ if (vf_netdev)
+ dev_set_mtu(vf_netdev, orig_mtu);
+
+ if (IS_ERR(nvdev)) {
+ netdev_err(ndev, "restoring mtu failed: %ld\n",
+ PTR_ERR(nvdev));
+ return ret;
+ }
+ }
+
+ if (was_opened)
+ rndis_filter_open(nvdev);
+
+ netif_device_attach(ndev);
/* We may have missed link change notifications */
schedule_delayed_work(&ndevctx->dwork, 0);
@@ -911,16 +1001,56 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
return ret;
}
+static void netvsc_get_vf_stats(struct net_device *net,
+ struct netvsc_vf_pcpu_stats *tot)
+{
+ struct net_device_context *ndev_ctx = netdev_priv(net);
+ int i;
+
+ memset(tot, 0, sizeof(*tot));
+
+ for_each_possible_cpu(i) {
+ const struct netvsc_vf_pcpu_stats *stats
+ = per_cpu_ptr(ndev_ctx->vf_stats, i);
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&stats->syncp);
+ rx_packets = stats->rx_packets;
+ tx_packets = stats->tx_packets;
+ rx_bytes = stats->rx_bytes;
+ tx_bytes = stats->tx_bytes;
+ } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+ tot->rx_packets += rx_packets;
+ tot->tx_packets += tx_packets;
+ tot->rx_bytes += rx_bytes;
+ tot->tx_bytes += tx_bytes;
+ tot->tx_dropped += stats->tx_dropped;
+ }
+}
+
static void netvsc_get_stats64(struct net_device *net,
struct rtnl_link_stats64 *t)
{
struct net_device_context *ndev_ctx = netdev_priv(net);
struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
+ struct netvsc_vf_pcpu_stats vf_tot;
int i;
if (!nvdev)
return;
+ netdev_stats_to_stats64(t, &net->stats);
+
+ netvsc_get_vf_stats(net, &vf_tot);
+ t->rx_packets += vf_tot.rx_packets;
+ t->tx_packets += vf_tot.tx_packets;
+ t->rx_bytes += vf_tot.rx_bytes;
+ t->tx_bytes += vf_tot.tx_bytes;
+ t->tx_dropped += vf_tot.tx_dropped;
+
for (i = 0; i < nvdev->num_chn; i++) {
const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
const struct netvsc_stats *stats;
@@ -949,33 +1079,36 @@ static void netvsc_get_stats64(struct net_device *net,
t->rx_packets += packets;
t->multicast += multicast;
}
-
- t->tx_dropped = net->stats.tx_dropped;
- t->tx_errors = net->stats.tx_errors;
-
- t->rx_dropped = net->stats.rx_dropped;
- t->rx_errors = net->stats.rx_errors;
}
static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
{
+ struct net_device_context *ndc = netdev_priv(ndev);
+ struct net_device *vf_netdev = rtnl_dereference(ndc->vf_netdev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
struct sockaddr *addr = p;
- char save_adr[ETH_ALEN];
- unsigned char save_aatype;
int err;
- memcpy(save_adr, ndev->dev_addr, ETH_ALEN);
- save_aatype = ndev->addr_assign_type;
-
- err = eth_mac_addr(ndev, p);
- if (err != 0)
+ err = eth_prepare_mac_addr_change(ndev, p);
+ if (err)
return err;
- err = rndis_filter_set_device_mac(ndev, addr->sa_data);
- if (err != 0) {
- /* roll back to saved MAC */
- memcpy(ndev->dev_addr, save_adr, ETH_ALEN);
- ndev->addr_assign_type = save_aatype;
+ if (!nvdev)
+ return -ENODEV;
+
+ if (vf_netdev) {
+ err = dev_set_mac_address(vf_netdev, addr);
+ if (err)
+ return err;
+ }
+
+ err = rndis_filter_set_device_mac(nvdev, addr->sa_data);
+ if (!err) {
+ eth_commit_mac_addr_change(ndev, p);
+ } else if (vf_netdev) {
+ /* rollback change on VF */
+ memcpy(addr->sa_data, ndev->dev_addr, ETH_ALEN);
+ dev_set_mac_address(vf_netdev, addr);
}
return err;
@@ -990,9 +1123,18 @@ static const struct {
{ "tx_no_space", offsetof(struct netvsc_ethtool_stats, tx_no_space) },
{ "tx_too_big", offsetof(struct netvsc_ethtool_stats, tx_too_big) },
{ "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) },
+ { "tx_send_full", offsetof(struct netvsc_ethtool_stats, tx_send_full) },
+ { "rx_comp_busy", offsetof(struct netvsc_ethtool_stats, rx_comp_busy) },
+}, vf_stats[] = {
+ { "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
+ { "vf_rx_bytes", offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
+ { "vf_tx_packets", offsetof(struct netvsc_vf_pcpu_stats, tx_packets) },
+ { "vf_tx_bytes", offsetof(struct netvsc_vf_pcpu_stats, tx_bytes) },
+ { "vf_tx_dropped", offsetof(struct netvsc_vf_pcpu_stats, tx_dropped) },
};
#define NETVSC_GLOBAL_STATS_LEN ARRAY_SIZE(netvsc_stats)
+#define NETVSC_VF_STATS_LEN ARRAY_SIZE(vf_stats)
/* 4 statistics per queue (rx/tx packets/bytes) */
#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
@@ -1007,7 +1149,9 @@ static int netvsc_get_sset_count(struct net_device *dev, int string_set)
switch (string_set) {
case ETH_SS_STATS:
- return NETVSC_GLOBAL_STATS_LEN + NETVSC_QUEUE_STATS_LEN(nvdev);
+ return NETVSC_GLOBAL_STATS_LEN
+ + NETVSC_VF_STATS_LEN
+ + NETVSC_QUEUE_STATS_LEN(nvdev);
default:
return -EINVAL;
}
@@ -1017,9 +1161,10 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats, u64 *data)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
const void *nds = &ndc->eth_stats;
const struct netvsc_stats *qstats;
+ struct netvsc_vf_pcpu_stats sum;
unsigned int start;
u64 packets, bytes;
int i, j;
@@ -1030,6 +1175,10 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++)
data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset);
+ netvsc_get_vf_stats(dev, &sum);
+ for (j = 0; j < NETVSC_VF_STATS_LEN; j++)
+ data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset);
+
for (j = 0; j < nvdev->num_chn; j++) {
qstats = &nvdev->chan_table[j].tx_stats;
@@ -1055,7 +1204,7 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
u8 *p = data;
int i;
@@ -1064,11 +1213,16 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
switch (stringset) {
case ETH_SS_STATS:
- for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
- memcpy(p + i * ETH_GSTRING_LEN,
- netvsc_stats[i].name, ETH_GSTRING_LEN);
+ for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) {
+ memcpy(p, netvsc_stats[i].name, ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vf_stats); i++) {
+ memcpy(p, vf_stats[i].name, ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
- p += i * ETH_GSTRING_LEN;
for (i = 0; i < nvdev->num_chn; i++) {
sprintf(p, "tx_queue_%u_packets", i);
p += ETH_GSTRING_LEN;
@@ -1085,7 +1239,7 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
}
static int
-netvsc_get_rss_hash_opts(struct netvsc_device *nvdev,
+netvsc_get_rss_hash_opts(struct net_device_context *ndc,
struct ethtool_rxnfc *info)
{
info->data = RXH_IP_SRC | RXH_IP_DST;
@@ -1094,9 +1248,20 @@ netvsc_get_rss_hash_opts(struct netvsc_device *nvdev,
case TCP_V4_FLOW:
case TCP_V6_FLOW:
info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
- /* fallthrough */
+ break;
+
case UDP_V4_FLOW:
+ if (ndc->udp4_l4_hash)
+ info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+
+ break;
+
case UDP_V6_FLOW:
+ if (ndc->udp6_l4_hash)
+ info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+
+ break;
+
case IPV4_FLOW:
case IPV6_FLOW:
break;
@@ -1113,7 +1278,7 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
u32 *rules)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
if (!nvdev)
return -ENODEV;
@@ -1124,11 +1289,51 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
return 0;
case ETHTOOL_GRXFH:
- return netvsc_get_rss_hash_opts(nvdev, info);
+ return netvsc_get_rss_hash_opts(ndc, info);
}
return -EOPNOTSUPP;
}
+static int netvsc_set_rss_hash_opts(struct net_device_context *ndc,
+ struct ethtool_rxnfc *info)
+{
+ if (info->data == (RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+ if (info->flow_type == UDP_V4_FLOW)
+ ndc->udp4_l4_hash = true;
+ else if (info->flow_type == UDP_V6_FLOW)
+ ndc->udp6_l4_hash = true;
+ else
+ return -EOPNOTSUPP;
+
+ return 0;
+ }
+
+ if (info->data == (RXH_IP_SRC | RXH_IP_DST)) {
+ if (info->flow_type == UDP_V4_FLOW)
+ ndc->udp4_l4_hash = false;
+ else if (info->flow_type == UDP_V6_FLOW)
+ ndc->udp6_l4_hash = false;
+ else
+ return -EOPNOTSUPP;
+
+ return 0;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int
+netvsc_set_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *info)
+{
+ struct net_device_context *ndc = netdev_priv(ndev);
+
+ if (info->cmd == ETHTOOL_SRXFH)
+ return netvsc_set_rss_hash_opts(ndc, info);
+
+ return -EOPNOTSUPP;
+}
+
#ifdef CONFIG_NET_POLL_CONTROLLER
static void netvsc_poll_controller(struct net_device *dev)
{
@@ -1163,7 +1368,7 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
u8 *hfunc)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *ndev = rcu_dereference(ndc->nvdev);
+ struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev);
struct rndis_device *rndis_dev;
int i;
@@ -1202,7 +1407,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
rndis_dev = ndev->extension;
if (indir) {
for (i = 0; i < ITAB_NUM; i++)
- if (indir[i] >= VRSS_CHANNEL_MAX)
+ if (indir[i] >= ndev->num_chn)
return -EINVAL;
for (i = 0; i < ITAB_NUM; i++)
@@ -1216,7 +1421,105 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
key = rndis_dev->rss_key;
}
- return rndis_filter_set_rss_param(rndis_dev, key, ndev->num_chn);
+ return rndis_filter_set_rss_param(rndis_dev, key);
+}
+
+/* Hyper-V RNDIS protocol does not have ring in the HW sense.
+ * It does have pre-allocated receive area which is divided into sections.
+ */
+static void __netvsc_get_ringparam(struct netvsc_device *nvdev,
+ struct ethtool_ringparam *ring)
+{
+ u32 max_buf_size;
+
+ ring->rx_pending = nvdev->recv_section_cnt;
+ ring->tx_pending = nvdev->send_section_cnt;
+
+ if (nvdev->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
+ max_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
+ else
+ max_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
+
+ ring->rx_max_pending = max_buf_size / nvdev->recv_section_size;
+ ring->tx_max_pending = NETVSC_SEND_BUFFER_SIZE
+ / nvdev->send_section_size;
+}
+
+static void netvsc_get_ringparam(struct net_device *ndev,
+ struct ethtool_ringparam *ring)
+{
+ struct net_device_context *ndevctx = netdev_priv(ndev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+
+ if (!nvdev)
+ return;
+
+ __netvsc_get_ringparam(nvdev, ring);
+}
+
+static int netvsc_set_ringparam(struct net_device *ndev,
+ struct ethtool_ringparam *ring)
+{
+ struct net_device_context *ndevctx = netdev_priv(ndev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+ struct hv_device *hdev = ndevctx->device_ctx;
+ struct netvsc_device_info device_info;
+ struct ethtool_ringparam orig;
+ u32 new_tx, new_rx;
+ bool was_opened;
+ int ret = 0;
+
+ if (!nvdev || nvdev->destroy)
+ return -ENODEV;
+
+ memset(&orig, 0, sizeof(orig));
+ __netvsc_get_ringparam(nvdev, &orig);
+
+ new_tx = clamp_t(u32, ring->tx_pending,
+ NETVSC_MIN_TX_SECTIONS, orig.tx_max_pending);
+ new_rx = clamp_t(u32, ring->rx_pending,
+ NETVSC_MIN_RX_SECTIONS, orig.rx_max_pending);
+
+ if (new_tx == orig.tx_pending &&
+ new_rx == orig.rx_pending)
+ return 0; /* no change */
+
+ memset(&device_info, 0, sizeof(device_info));
+ device_info.num_chn = nvdev->num_chn;
+ device_info.ring_size = ring_size;
+ device_info.send_sections = new_tx;
+ device_info.recv_sections = new_rx;
+
+ netif_device_detach(ndev);
+ was_opened = rndis_filter_opened(nvdev);
+ if (was_opened)
+ rndis_filter_close(nvdev);
+
+ rndis_filter_device_remove(hdev, nvdev);
+
+ nvdev = rndis_filter_device_add(hdev, &device_info);
+ if (IS_ERR(nvdev)) {
+ ret = PTR_ERR(nvdev);
+
+ device_info.send_sections = orig.tx_pending;
+ device_info.recv_sections = orig.rx_pending;
+ nvdev = rndis_filter_device_add(hdev, &device_info);
+ if (IS_ERR(nvdev)) {
+ netdev_err(ndev, "restoring ringparam failed: %ld\n",
+ PTR_ERR(nvdev));
+ return ret;
+ }
+ }
+
+ if (was_opened)
+ rndis_filter_open(nvdev);
+ netif_device_attach(ndev);
+
+ /* We may have missed link change notifications */
+ ndevctx->last_reconfig = 0;
+ schedule_delayed_work(&ndevctx->dwork, 0);
+
+ return ret;
}
static const struct ethtool_ops ethtool_ops = {
@@ -1229,12 +1532,15 @@ static const struct ethtool_ops ethtool_ops = {
.set_channels = netvsc_set_channels,
.get_ts_info = ethtool_op_get_ts_info,
.get_rxnfc = netvsc_get_rxnfc,
+ .set_rxnfc = netvsc_set_rxnfc,
.get_rxfh_key_size = netvsc_get_rxfh_key_size,
.get_rxfh_indir_size = netvsc_rss_indir_size,
.get_rxfh = netvsc_get_rxfh,
.set_rxfh = netvsc_set_rxfh,
.get_link_ksettings = netvsc_get_link_ksettings,
.set_link_ksettings = netvsc_set_link_ksettings,
+ .get_ringparam = netvsc_get_ringparam,
+ .set_ringparam = netvsc_set_ringparam,
};
static const struct net_device_ops device_ops = {
@@ -1269,7 +1575,12 @@ static void netvsc_link_change(struct work_struct *w)
bool notify = false, reschedule = false;
unsigned long flags, next_reconfig, delay;
- rtnl_lock();
+ /* if changes are happening, comeback later */
+ if (!rtnl_trylock()) {
+ schedule_delayed_work(&ndev_ctx->dwork, LINKCHANGE_INT);
+ return;
+ }
+
net_device = rtnl_dereference(ndev_ctx->nvdev);
if (!net_device)
goto out_unlock;
@@ -1308,8 +1619,7 @@ static void netvsc_link_change(struct work_struct *w)
case RNDIS_STATUS_MEDIA_CONNECT:
if (rdev->link_state) {
rdev->link_state = false;
- if (!ndev_ctx->datapath)
- netif_carrier_on(net);
+ netif_carrier_on(net);
netif_tx_wake_all_queues(net);
} else {
notify = true;
@@ -1386,7 +1696,7 @@ static struct net_device *get_netvsc_byref(struct net_device *vf_netdev)
continue; /* not a netvsc device */
net_device_ctx = netdev_priv(dev);
- if (net_device_ctx->nvdev == NULL)
+ if (!rtnl_dereference(net_device_ctx->nvdev))
continue; /* device is removed */
if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev)
@@ -1396,6 +1706,108 @@ static struct net_device *get_netvsc_byref(struct net_device *vf_netdev)
return NULL;
}
+/* Called when VF is injecting data into network stack.
+ * Change the associated network device from VF to netvsc.
+ * note: already called with rcu_read_lock
+ */
+static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
+{
+ struct sk_buff *skb = *pskb;
+ struct net_device *ndev = rcu_dereference(skb->dev->rx_handler_data);
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
+ struct netvsc_vf_pcpu_stats *pcpu_stats
+ = this_cpu_ptr(ndev_ctx->vf_stats);
+
+ skb->dev = ndev;
+
+ u64_stats_update_begin(&pcpu_stats->syncp);
+ pcpu_stats->rx_packets++;
+ pcpu_stats->rx_bytes += skb->len;
+ u64_stats_update_end(&pcpu_stats->syncp);
+
+ return RX_HANDLER_ANOTHER;
+}
+
+static int netvsc_vf_join(struct net_device *vf_netdev,
+ struct net_device *ndev)
+{
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
+ int ret;
+
+ ret = netdev_rx_handler_register(vf_netdev,
+ netvsc_vf_handle_frame, ndev);
+ if (ret != 0) {
+ netdev_err(vf_netdev,
+ "can not register netvsc VF receive handler (err = %d)\n",
+ ret);
+ goto rx_handler_failed;
+ }
+
+ ret = netdev_upper_dev_link(vf_netdev, ndev);
+ if (ret != 0) {
+ netdev_err(vf_netdev,
+ "can not set master device %s (err = %d)\n",
+ ndev->name, ret);
+ goto upper_link_failed;
+ }
+
+ /* set slave flag before open to prevent IPv6 addrconf */
+ vf_netdev->flags |= IFF_SLAVE;
+
+ schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
+
+ call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
+
+ netdev_info(vf_netdev, "joined to %s\n", ndev->name);
+ return 0;
+
+upper_link_failed:
+ netdev_rx_handler_unregister(vf_netdev);
+rx_handler_failed:
+ return ret;
+}
+
+static void __netvsc_vf_setup(struct net_device *ndev,
+ struct net_device *vf_netdev)
+{
+ int ret;
+
+ /* Align MTU of VF with master */
+ ret = dev_set_mtu(vf_netdev, ndev->mtu);
+ if (ret)
+ netdev_warn(vf_netdev,
+ "unable to change mtu to %u\n", ndev->mtu);
+
+ if (netif_running(ndev)) {
+ ret = dev_open(vf_netdev);
+ if (ret)
+ netdev_warn(vf_netdev,
+ "unable to open: %d\n", ret);
+ }
+}
+
+/* Setup VF as slave of the synthetic device.
+ * Runs in workqueue to avoid recursion in netlink callbacks.
+ */
+static void netvsc_vf_setup(struct work_struct *w)
+{
+ struct net_device_context *ndev_ctx
+ = container_of(w, struct net_device_context, vf_takeover.work);
+ struct net_device *ndev = hv_get_drvdata(ndev_ctx->device_ctx);
+ struct net_device *vf_netdev;
+
+ if (!rtnl_trylock()) {
+ schedule_delayed_work(&ndev_ctx->vf_takeover, 0);
+ return;
+ }
+
+ vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+ if (vf_netdev)
+ __netvsc_vf_setup(ndev, vf_netdev);
+
+ rtnl_unlock();
+}
+
static int netvsc_register_vf(struct net_device *vf_netdev)
{
struct net_device *ndev;
@@ -1419,56 +1831,23 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev))
return NOTIFY_DONE;
+ if (netvsc_vf_join(vf_netdev, ndev) != 0)
+ return NOTIFY_DONE;
+
netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
- /*
- * Take a reference on the module.
- */
- try_module_get(THIS_MODULE);
dev_hold(vf_netdev);
rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev);
return NOTIFY_OK;
}
-static int netvsc_vf_up(struct net_device *vf_netdev)
+/* VF up/down change detected, schedule to change data path */
+static int netvsc_vf_changed(struct net_device *vf_netdev)
{
- struct net_device *ndev;
- struct netvsc_device *netvsc_dev;
struct net_device_context *net_device_ctx;
-
- ndev = get_netvsc_byref(vf_netdev);
- if (!ndev)
- return NOTIFY_DONE;
-
- net_device_ctx = netdev_priv(ndev);
- netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
-
- netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
-
- /*
- * Open the device before switching data path.
- */
- rndis_filter_open(netvsc_dev);
-
- /*
- * notify the host to switch the data path.
- */
- netvsc_switch_datapath(ndev, true);
- netdev_info(ndev, "Data path switched to VF: %s\n", vf_netdev->name);
-
- netif_carrier_off(ndev);
-
- /* Now notify peers through VF device. */
- call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev);
-
- return NOTIFY_OK;
-}
-
-static int netvsc_vf_down(struct net_device *vf_netdev)
-{
- struct net_device *ndev;
struct netvsc_device *netvsc_dev;
- struct net_device_context *net_device_ctx;
+ struct net_device *ndev;
+ bool vf_is_up = netif_running(vf_netdev);
ndev = get_netvsc_byref(vf_netdev);
if (!ndev)
@@ -1476,15 +1855,12 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
net_device_ctx = netdev_priv(ndev);
netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
+ if (!netvsc_dev)
+ return NOTIFY_DONE;
- netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
- netvsc_switch_datapath(ndev, false);
- netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
- rndis_filter_close(netvsc_dev);
- netif_carrier_on(ndev);
-
- /* Now notify peers through netvsc device. */
- call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev);
+ netvsc_switch_datapath(ndev, vf_is_up);
+ netdev_info(ndev, "Data path switched %s VF: %s\n",
+ vf_is_up ? "to" : "from", vf_netdev->name);
return NOTIFY_OK;
}
@@ -1499,12 +1875,15 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
return NOTIFY_DONE;
net_device_ctx = netdev_priv(ndev);
+ cancel_delayed_work_sync(&net_device_ctx->vf_takeover);
netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
+ netdev_rx_handler_unregister(vf_netdev);
+ netdev_upper_dev_unlink(vf_netdev, ndev);
RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
dev_put(vf_netdev);
- module_put(THIS_MODULE);
+
return NOTIFY_OK;
}
@@ -1515,12 +1894,12 @@ static int netvsc_probe(struct hv_device *dev,
struct net_device_context *net_device_ctx;
struct netvsc_device_info device_info;
struct netvsc_device *nvdev;
- int ret;
+ int ret = -ENOMEM;
net = alloc_etherdev_mq(sizeof(struct net_device_context),
VRSS_CHANNEL_MAX);
if (!net)
- return -ENOMEM;
+ goto no_net;
netif_carrier_off(net);
@@ -1539,6 +1918,12 @@ static int netvsc_probe(struct hv_device *dev,
spin_lock_init(&net_device_ctx->lock);
INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
+ INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
+
+ net_device_ctx->vf_stats
+ = netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats);
+ if (!net_device_ctx->vf_stats)
+ goto no_stats;
net->netdev_ops = &device_ops;
net->ethtool_ops = &ethtool_ops;
@@ -1551,13 +1936,16 @@ static int netvsc_probe(struct hv_device *dev,
memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
device_info.num_chn = VRSS_CHANNEL_DEFAULT;
- ret = rndis_filter_device_add(dev, &device_info);
- if (ret != 0) {
+ device_info.send_sections = NETVSC_DEFAULT_TX;
+ device_info.recv_sections = NETVSC_DEFAULT_RX;
+
+ nvdev = rndis_filter_device_add(dev, &device_info);
+ if (IS_ERR(nvdev)) {
+ ret = PTR_ERR(nvdev);
netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
- free_netdev(net);
- hv_set_drvdata(dev, NULL);
- return ret;
+ goto rndis_failed;
}
+
memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
/* hw_features computed in rndis_filter_device_add */
@@ -1566,11 +1954,11 @@ static int netvsc_probe(struct hv_device *dev,
NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
net->vlan_features = net->features;
- /* RCU not necessary here, device not registered */
- nvdev = net_device_ctx->nvdev;
netif_set_real_num_tx_queues(net, nvdev->num_chn);
netif_set_real_num_rx_queues(net, nvdev->num_chn);
+ netdev_lockdep_set_classes(net);
+
/* MTU range: 68 - 1500 or 65521 */
net->min_mtu = NETVSC_MTU_MIN;
if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
@@ -1581,20 +1969,29 @@ static int netvsc_probe(struct hv_device *dev,
ret = register_netdev(net);
if (ret != 0) {
pr_err("Unable to register netdev.\n");
- rndis_filter_device_remove(dev, nvdev);
- free_netdev(net);
+ goto register_failed;
}
return ret;
+
+register_failed:
+ rndis_filter_device_remove(dev, nvdev);
+rndis_failed:
+ free_percpu(net_device_ctx->vf_stats);
+no_stats:
+ hv_set_drvdata(dev, NULL);
+ free_netdev(net);
+no_net:
+ return ret;
}
static int netvsc_remove(struct hv_device *dev)
{
- struct net_device *net;
struct net_device_context *ndev_ctx;
+ struct net_device *vf_netdev;
+ struct net_device *net;
net = hv_get_drvdata(dev);
-
if (net == NULL) {
dev_err(&dev->device, "No net device to remove\n");
return 0;
@@ -1611,13 +2008,18 @@ static int netvsc_remove(struct hv_device *dev)
* removed. Also blocks mtu and channel changes.
*/
rtnl_lock();
- rndis_filter_device_remove(dev, ndev_ctx->nvdev);
- rtnl_unlock();
+ vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+ if (vf_netdev)
+ netvsc_unregister_vf(vf_netdev);
- unregister_netdev(net);
+ rndis_filter_device_remove(dev,
+ rtnl_dereference(ndev_ctx->nvdev));
+ unregister_netdevice(net);
+ rtnl_unlock();
hv_set_drvdata(dev, NULL);
+ free_percpu(ndev_ctx->vf_stats);
free_netdev(net);
return 0;
}
@@ -1672,9 +2074,8 @@ static int netvsc_netdev_event(struct notifier_block *this,
case NETDEV_UNREGISTER:
return netvsc_unregister_vf(event_dev);
case NETDEV_UP:
- return netvsc_vf_up(event_dev);
case NETDEV_DOWN:
- return netvsc_vf_down(event_dev);
+ return netvsc_vf_changed(event_dev);
default:
return NOTIFY_DONE;
}
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index d6308ffda53e..69c40b8fccc3 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -28,6 +28,7 @@
#include <linux/if_vlan.h>
#include <linux/nls.h>
#include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
#include "hyperv_net.h"
@@ -213,11 +214,11 @@ static void dump_rndis_message(struct hv_device *hv_dev,
static int rndis_filter_send_request(struct rndis_device *dev,
struct rndis_request *req)
{
- int ret;
struct hv_netvsc_packet *packet;
struct hv_page_buffer page_buf[2];
struct hv_page_buffer *pb = page_buf;
struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
+ int ret;
/* Setup the packet to send it */
packet = &req->pkt;
@@ -243,7 +244,10 @@ static int rndis_filter_send_request(struct rndis_device *dev,
pb[0].len;
}
- ret = netvsc_send(net_device_ctx->device_ctx, packet, NULL, &pb, NULL);
+ rcu_read_lock_bh();
+ ret = netvsc_send(net_device_ctx, packet, NULL, pb, NULL);
+ rcu_read_unlock_bh();
+
return ret;
}
@@ -443,8 +447,9 @@ int rndis_filter_receive(struct net_device *ndev,
return 0;
}
-static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
- void *result, u32 *result_size)
+static int rndis_filter_query_device(struct rndis_device *dev,
+ struct netvsc_device *nvdev,
+ u32 oid, void *result, u32 *result_size)
{
struct rndis_request *request;
u32 inresult_size = *result_size;
@@ -471,8 +476,6 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
query->dev_vc_handle = 0;
if (oid == OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES) {
- struct net_device_context *ndevctx = netdev_priv(dev->ndev);
- struct netvsc_device *nvdev = ndevctx->nvdev;
struct ndis_offload *hwcaps;
u32 nvsp_version = nvdev->nvsp_version;
u8 ndis_rev;
@@ -541,14 +544,15 @@ cleanup:
/* Get the hardware offload capabilities */
static int
-rndis_query_hwcaps(struct rndis_device *dev, struct ndis_offload *caps)
+rndis_query_hwcaps(struct rndis_device *dev, struct netvsc_device *net_device,
+ struct ndis_offload *caps)
{
u32 caps_len = sizeof(*caps);
int ret;
memset(caps, 0, sizeof(*caps));
- ret = rndis_filter_query_device(dev,
+ ret = rndis_filter_query_device(dev, net_device,
OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES,
caps, &caps_len);
if (ret)
@@ -577,11 +581,12 @@ rndis_query_hwcaps(struct rndis_device *dev, struct ndis_offload *caps)
return 0;
}
-static int rndis_filter_query_device_mac(struct rndis_device *dev)
+static int rndis_filter_query_device_mac(struct rndis_device *dev,
+ struct netvsc_device *net_device)
{
u32 size = ETH_ALEN;
- return rndis_filter_query_device(dev,
+ return rndis_filter_query_device(dev, net_device,
RNDIS_OID_802_3_PERMANENT_ADDRESS,
dev->hw_mac_adr, &size);
}
@@ -589,9 +594,9 @@ static int rndis_filter_query_device_mac(struct rndis_device *dev)
#define NWADR_STR "NetworkAddress"
#define NWADR_STRLEN 14
-int rndis_filter_set_device_mac(struct net_device *ndev, char *mac)
+int rndis_filter_set_device_mac(struct netvsc_device *nvdev,
+ const char *mac)
{
- struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev);
struct rndis_device *rdev = nvdev->extension;
struct rndis_request *request;
struct rndis_set_request *set;
@@ -645,11 +650,8 @@ int rndis_filter_set_device_mac(struct net_device *ndev, char *mac)
wait_for_completion(&request->wait_event);
set_complete = &request->response_msg.msg.set_complete;
- if (set_complete->status != RNDIS_STATUS_SUCCESS) {
- netdev_err(ndev, "Fail to set MAC on host side:0x%x\n",
- set_complete->status);
- ret = -EINVAL;
- }
+ if (set_complete->status != RNDIS_STATUS_SUCCESS)
+ ret = -EIO;
cleanup:
put_rndis_request(rdev, request);
@@ -658,9 +660,9 @@ cleanup:
static int
rndis_filter_set_offload_params(struct net_device *ndev,
+ struct netvsc_device *nvdev,
struct ndis_offload_params *req_offloads)
{
- struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev);
struct rndis_device *rdev = nvdev->extension;
struct rndis_request *request;
struct rndis_set_request *set;
@@ -715,7 +717,7 @@ cleanup:
}
int rndis_filter_set_rss_param(struct rndis_device *rdev,
- const u8 *rss_key, int num_queue)
+ const u8 *rss_key)
{
struct net_device *ndev = rdev->ndev;
struct rndis_request *request;
@@ -782,27 +784,27 @@ cleanup:
return ret;
}
-static int rndis_filter_query_device_link_status(struct rndis_device *dev)
+static int rndis_filter_query_device_link_status(struct rndis_device *dev,
+ struct netvsc_device *net_device)
{
u32 size = sizeof(u32);
u32 link_status;
- int ret;
-
- ret = rndis_filter_query_device(dev,
- RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
- &link_status, &size);
- return ret;
+ return rndis_filter_query_device(dev, net_device,
+ RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
+ &link_status, &size);
}
-static int rndis_filter_query_link_speed(struct rndis_device *dev)
+static int rndis_filter_query_link_speed(struct rndis_device *dev,
+ struct netvsc_device *net_device)
{
u32 size = sizeof(u32);
u32 link_speed;
struct net_device_context *ndc;
int ret;
- ret = rndis_filter_query_device(dev, RNDIS_OID_GEN_LINK_SPEED,
+ ret = rndis_filter_query_device(dev, net_device,
+ RNDIS_OID_GEN_LINK_SPEED,
&link_speed, &size);
if (!ret) {
@@ -871,14 +873,14 @@ void rndis_filter_update(struct netvsc_device *nvdev)
schedule_work(&rdev->mcast_work);
}
-static int rndis_filter_init_device(struct rndis_device *dev)
+static int rndis_filter_init_device(struct rndis_device *dev,
+ struct netvsc_device *nvdev)
{
struct rndis_request *request;
struct rndis_initialize_request *init;
struct rndis_initialize_complete *init_complete;
u32 status;
int ret;
- struct netvsc_device *nvdev = net_device_to_netvsc_device(dev->ndev);
request = get_rndis_request(dev, RNDIS_MSG_INIT,
RNDIS_MESSAGE_SIZE(struct rndis_initialize_request));
@@ -926,12 +928,12 @@ static bool netvsc_device_idle(const struct netvsc_device *nvdev)
{
int i;
- if (atomic_read(&nvdev->num_outstanding_recvs) > 0)
- return false;
-
for (i = 0; i < nvdev->num_chn; i++) {
const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
+ if (nvchan->mrc.first != nvchan->mrc.next)
+ return false;
+
if (atomic_read(&nvchan->queue_sends) > 0)
return false;
}
@@ -944,7 +946,7 @@ static void rndis_filter_halt_device(struct rndis_device *dev)
struct rndis_request *request;
struct rndis_halt_request *halt;
struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
- struct netvsc_device *nvdev = net_device_ctx->nvdev;
+ struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
/* Attempt to do a rndis device halt */
request = get_rndis_request(dev, RNDIS_MSG_HALT,
@@ -1015,20 +1017,20 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
{
struct net_device *ndev =
hv_get_drvdata(new_sc->primary_channel->device_obj);
- struct netvsc_device *nvscdev = net_device_to_netvsc_device(ndev);
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
+ struct netvsc_device *nvscdev;
u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
struct netvsc_channel *nvchan;
int ret;
- if (chn_index >= nvscdev->num_chn)
+ /* This is safe because this callback only happens when
+ * new device is being setup and waiting on the channel_init_wait.
+ */
+ nvscdev = rcu_dereference_raw(ndev_ctx->nvdev);
+ if (!nvscdev || chn_index >= nvscdev->num_chn)
return;
nvchan = nvscdev->chan_table + chn_index;
- nvchan->mrc.buf
- = vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
-
- if (!nvchan->mrc.buf)
- return;
/* Because the device uses NAPI, all the interrupt batching and
* control is done via Net softirq, not the channel handling
@@ -1052,8 +1054,8 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
wake_up(&nvscdev->subchan_open);
}
-int rndis_filter_device_add(struct hv_device *dev,
- struct netvsc_device_info *device_info)
+struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+ struct netvsc_device_info *device_info)
{
struct net_device *net = hv_get_drvdata(dev);
struct net_device_context *net_device_ctx = netdev_priv(net);
@@ -1065,28 +1067,27 @@ int rndis_filter_device_add(struct hv_device *dev,
struct ndis_recv_scale_cap rsscap;
u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
unsigned int gso_max_size = GSO_MAX_SIZE;
- u32 mtu, size, num_rss_qs;
+ u32 mtu, size;
const struct cpumask *node_cpu_mask;
u32 num_possible_rss_qs;
int i, ret;
rndis_device = get_rndis_device();
if (!rndis_device)
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
/*
* Let the inner driver handle this first to create the netvsc channel
* NOTE! Once the channel is created, we may get a receive callback
* (RndisFilterOnReceive()) before this call is completed
*/
- ret = netvsc_device_add(dev, device_info);
- if (ret != 0) {
+ net_device = netvsc_device_add(dev, device_info);
+ if (IS_ERR(net_device)) {
kfree(rndis_device);
- return ret;
+ return net_device;
}
/* Initialize the rndis device */
- net_device = net_device_ctx->nvdev;
net_device->max_chn = 1;
net_device->num_chn = 1;
@@ -1094,35 +1095,29 @@ int rndis_filter_device_add(struct hv_device *dev,
rndis_device->ndev = net;
/* Send the rndis initialization message */
- ret = rndis_filter_init_device(rndis_device);
- if (ret != 0) {
- rndis_filter_device_remove(dev, net_device);
- return ret;
- }
+ ret = rndis_filter_init_device(rndis_device, net_device);
+ if (ret != 0)
+ goto err_dev_remv;
/* Get the MTU from the host */
size = sizeof(u32);
- ret = rndis_filter_query_device(rndis_device,
+ ret = rndis_filter_query_device(rndis_device, net_device,
RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE,
&mtu, &size);
if (ret == 0 && size == sizeof(u32) && mtu < net->mtu)
net->mtu = mtu;
/* Get the mac address */
- ret = rndis_filter_query_device_mac(rndis_device);
- if (ret != 0) {
- rndis_filter_device_remove(dev, net_device);
- return ret;
- }
+ ret = rndis_filter_query_device_mac(rndis_device, net_device);
+ if (ret != 0)
+ goto err_dev_remv;
memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
/* Find HW offload capabilities */
- ret = rndis_query_hwcaps(rndis_device, &hwcaps);
- if (ret != 0) {
- rndis_filter_device_remove(dev, net_device);
- return ret;
- }
+ ret = rndis_query_hwcaps(rndis_device, net_device, &hwcaps);
+ if (ret != 0)
+ goto err_dev_remv;
/* A value of zero means "no change"; now turn on what we want. */
memset(&offloads, 0, sizeof(struct ndis_offload_params));
@@ -1177,24 +1172,24 @@ int rndis_filter_device_add(struct hv_device *dev,
netif_set_gso_max_size(net, gso_max_size);
- ret = rndis_filter_set_offload_params(net, &offloads);
+ ret = rndis_filter_set_offload_params(net, net_device, &offloads);
if (ret)
goto err_dev_remv;
- rndis_filter_query_device_link_status(rndis_device);
+ rndis_filter_query_device_link_status(rndis_device, net_device);
netdev_dbg(net, "Device MAC %pM link state %s\n",
rndis_device->hw_mac_adr,
rndis_device->link_state ? "down" : "up");
if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
- return 0;
+ return net_device;
- rndis_filter_query_link_speed(rndis_device);
+ rndis_filter_query_link_speed(rndis_device, net_device);
/* vRSS setup */
memset(&rsscap, 0, rsscap_size);
- ret = rndis_filter_query_device(rndis_device,
+ ret = rndis_filter_query_device(rndis_device, net_device,
OID_GEN_RECEIVE_SCALE_CAPABILITIES,
&rsscap, &rsscap_size);
if (ret || rsscap.num_recv_que < 2)
@@ -1220,9 +1215,18 @@ int rndis_filter_device_add(struct hv_device *dev,
net_device->num_chn);
atomic_set(&net_device->open_chn, 1);
- num_rss_qs = net_device->num_chn - 1;
- if (num_rss_qs == 0)
- return 0;
+
+ if (net_device->num_chn == 1)
+ return net_device;
+
+ for (i = 1; i < net_device->num_chn; i++) {
+ ret = netvsc_alloc_recv_comp_ring(net_device, i);
+ if (ret) {
+ while (--i != 0)
+ vfree(net_device->chan_table[i].mrc.slots);
+ goto out;
+ }
+ }
vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
@@ -1254,19 +1258,18 @@ int rndis_filter_device_add(struct hv_device *dev,
atomic_read(&net_device->open_chn) == net_device->num_chn);
/* ignore failues from setting rss parameters, still have channels */
- rndis_filter_set_rss_param(rndis_device, netvsc_hash_key,
- net_device->num_chn);
+ rndis_filter_set_rss_param(rndis_device, netvsc_hash_key);
out:
if (ret) {
net_device->max_chn = 1;
net_device->num_chn = 1;
}
- return 0; /* return 0 because primary channel can be used alone */
+ return net_device;
err_dev_remv:
rndis_filter_device_remove(dev, net_device);
- return ret;
+ return ERR_PTR(ret);
}
void rndis_filter_device_remove(struct hv_device *dev,
@@ -1304,3 +1307,8 @@ int rndis_filter_close(struct netvsc_device *nvdev)
return rndis_filter_close_device(nvdev->extension);
}
+
+bool rndis_filter_opened(const struct netvsc_device *nvdev)
+{
+ return atomic_read(&nvdev->open_cnt) > 0;
+}