summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/netlink_mmap.txt332
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h27
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c138
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c92
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/adapter.h8
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c116
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h20
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c88
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_l2.c23
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede.h10
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c277
-rw-r--r--drivers/net/geneve.c98
-rw-r--r--drivers/net/phy/dp83848.c2
-rw-r--r--drivers/net/vxlan.c30
-rw-r--r--include/linux/netfilter/nfnetlink.h2
-rw-r--r--include/linux/netlink.h10
-rw-r--r--include/linux/qed/qed_eth_if.h2
-rw-r--r--include/linux/skbuff.h17
-rw-r--r--include/net/genetlink.h4
-rw-r--r--include/net/ip_tunnels.h3
-rw-r--r--include/net/ipv6.h8
-rw-r--r--include/net/vxlan.h9
-rw-r--r--include/uapi/linux/netlink.h4
-rw-r--r--include/uapi/linux/netlink_diag.h2
-rw-r--r--net/bridge/br_netlink.c1
-rw-r--r--net/bridge/br_private.h1
-rw-r--r--net/bridge/br_stp.c14
-rw-r--r--net/bridge/br_stp_if.c2
-rw-r--r--net/bridge/br_stp_timer.c1
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ip_tunnel_core.c8
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/netfilter/nfnetlink.c7
-rw-r--r--net/netfilter/nfnetlink_log.c5
-rw-r--r--net/netfilter/nfnetlink_queue.c6
-rw-r--r--net/netlink/Kconfig9
-rw-r--r--net/netlink/af_netlink.c774
-rw-r--r--net/netlink/af_netlink.h15
-rw-r--r--net/netlink/diag.c39
-rw-r--r--net/netlink/genetlink.c21
-rw-r--r--net/openvswitch/datapath.c20
-rw-r--r--net/tipc/netlink_compat.c1
44 files changed, 803 insertions, 1453 deletions
diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt
deleted file mode 100644
index 54f10478e8e3..000000000000
--- a/Documentation/networking/netlink_mmap.txt
+++ /dev/null
@@ -1,332 +0,0 @@
-This file documents how to use memory mapped I/O with netlink.
-
-Author: Patrick McHardy <kaber@trash.net>
-
-Overview
---------
-
-Memory mapped netlink I/O can be used to increase throughput and decrease
-overhead of unicast receive and transmit operations. Some netlink subsystems
-require high throughput, these are mainly the netfilter subsystems
-nfnetlink_queue and nfnetlink_log, but it can also help speed up large
-dump operations of f.i. the routing database.
-
-Memory mapped netlink I/O used two circular ring buffers for RX and TX which
-are mapped into the processes address space.
-
-The RX ring is used by the kernel to directly construct netlink messages into
-user-space memory without copying them as done with regular socket I/O,
-additionally as long as the ring contains messages no recvmsg() or poll()
-syscalls have to be issued by user-space to get more message.
-
-The TX ring is used to process messages directly from user-space memory, the
-kernel processes all messages contained in the ring using a single sendmsg()
-call.
-
-Usage overview
---------------
-
-In order to use memory mapped netlink I/O, user-space needs three main changes:
-
-- ring setup
-- conversion of the RX path to get messages from the ring instead of recvmsg()
-- conversion of the TX path to construct messages into the ring
-
-Ring setup is done using setsockopt() to provide the ring parameters to the
-kernel, then a call to mmap() to map the ring into the processes address space:
-
-- setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &params, sizeof(params));
-- setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &params, sizeof(params));
-- ring = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)
-
-Usage of either ring is optional, but even if only the RX ring is used the
-mapping still needs to be writable in order to update the frame status after
-processing.
-
-Conversion of the reception path involves calling poll() on the file
-descriptor, once the socket is readable the frames from the ring are
-processed in order until no more messages are available, as indicated by
-a status word in the frame header.
-
-On kernel side, in order to make use of memory mapped I/O on receive, the
-originating netlink subsystem needs to support memory mapped I/O, otherwise
-it will use an allocated socket buffer as usual and the contents will be
- copied to the ring on transmission, nullifying most of the performance gains.
-Dumps of kernel databases automatically support memory mapped I/O.
-
-Conversion of the transmit path involves changing message construction to
-use memory from the TX ring instead of (usually) a buffer declared on the
-stack and setting up the frame header appropriately. Optionally poll() can
-be used to wait for free frames in the TX ring.
-
-Structured and definitions for using memory mapped I/O are contained in
-<linux/netlink.h>.
-
-RX and TX rings
-----------------
-
-Each ring contains a number of continuous memory blocks, containing frames of
-fixed size dependent on the parameters used for ring setup.
-
-Ring: [ block 0 ]
- [ frame 0 ]
- [ frame 1 ]
- [ block 1 ]
- [ frame 2 ]
- [ frame 3 ]
- ...
- [ block n ]
- [ frame 2 * n ]
- [ frame 2 * n + 1 ]
-
-The blocks are only visible to the kernel, from the point of view of user-space
-the ring just contains the frames in a continuous memory zone.
-
-The ring parameters used for setting up the ring are defined as follows:
-
-struct nl_mmap_req {
- unsigned int nm_block_size;
- unsigned int nm_block_nr;
- unsigned int nm_frame_size;
- unsigned int nm_frame_nr;
-};
-
-Frames are grouped into blocks, where each block is a continuous region of memory
-and holds nm_block_size / nm_frame_size frames. The total number of frames in
-the ring is nm_frame_nr. The following invariants hold:
-
-- frames_per_block = nm_block_size / nm_frame_size
-
-- nm_frame_nr = frames_per_block * nm_block_nr
-
-Some parameters are constrained, specifically:
-
-- nm_block_size must be a multiple of the architectures memory page size.
- The getpagesize() function can be used to get the page size.
-
-- nm_frame_size must be equal or larger to NL_MMAP_HDRLEN, IOW a frame must be
- able to hold at least the frame header
-
-- nm_frame_size must be smaller or equal to nm_block_size
-
-- nm_frame_size must be a multiple of NL_MMAP_MSG_ALIGNMENT
-
-- nm_frame_nr must equal the actual number of frames as specified above.
-
-When the kernel can't allocate physically continuous memory for a ring block,
-it will fall back to use physically discontinuous memory. This might affect
-performance negatively, in order to avoid this the nm_frame_size parameter
-should be chosen to be as small as possible for the required frame size and
-the number of blocks should be increased instead.
-
-Ring frames
-------------
-
-Each frames contain a frame header, consisting of a synchronization word and some
-meta-data, and the message itself.
-
-Frame: [ header message ]
-
-The frame header is defined as follows:
-
-struct nl_mmap_hdr {
- unsigned int nm_status;
- unsigned int nm_len;
- __u32 nm_group;
- /* credentials */
- __u32 nm_pid;
- __u32 nm_uid;
- __u32 nm_gid;
-};
-
-- nm_status is used for synchronizing processing between the kernel and user-
- space and specifies ownership of the frame as well as the operation to perform
-
-- nm_len contains the length of the message contained in the data area
-
-- nm_group specified the destination multicast group of message
-
-- nm_pid, nm_uid and nm_gid contain the netlink pid, UID and GID of the sending
- process. These values correspond to the data available using SOCK_PASSCRED in
- the SCM_CREDENTIALS cmsg.
-
-The possible values in the status word are:
-
-- NL_MMAP_STATUS_UNUSED:
- RX ring: frame belongs to the kernel and contains no message
- for user-space. Approriate action is to invoke poll()
- to wait for new messages.
-
- TX ring: frame belongs to user-space and can be used for
- message construction.
-
-- NL_MMAP_STATUS_RESERVED:
- RX ring only: frame is currently used by the kernel for message
- construction and contains no valid message yet.
- Appropriate action is to invoke poll() to wait for
- new messages.
-
-- NL_MMAP_STATUS_VALID:
- RX ring: frame contains a valid message. Approriate action is
- to process the message and release the frame back to
- the kernel by setting the status to
- NL_MMAP_STATUS_UNUSED or queue the frame by setting the
- status to NL_MMAP_STATUS_SKIP.
-
- TX ring: the frame contains a valid message from user-space to
- be processed by the kernel. After completing processing
- the kernel will release the frame back to user-space by
- setting the status to NL_MMAP_STATUS_UNUSED.
-
-- NL_MMAP_STATUS_COPY:
- RX ring only: a message is ready to be processed but could not be
- stored in the ring, either because it exceeded the
- frame size or because the originating subsystem does
- not support memory mapped I/O. Appropriate action is
- to invoke recvmsg() to receive the message and release
- the frame back to the kernel by setting the status to
- NL_MMAP_STATUS_UNUSED.
-
-- NL_MMAP_STATUS_SKIP:
- RX ring only: user-space queued the message for later processing, but
- processed some messages following it in the ring. The
- kernel should skip this frame when looking for unused
- frames.
-
-The data area of a frame begins at a offset of NL_MMAP_HDRLEN relative to the
-frame header.
-
-TX limitations
---------------
-
-As of Jan 2015 the message is always copied from the ring frame to an
-allocated buffer due to unresolved security concerns.
-See commit 4682a0358639b29cf ("netlink: Always copy on mmap TX.").
-
-Example
--------
-
-Ring setup:
-
- unsigned int block_size = 16 * getpagesize();
- struct nl_mmap_req req = {
- .nm_block_size = block_size,
- .nm_block_nr = 64,
- .nm_frame_size = 16384,
- .nm_frame_nr = 64 * block_size / 16384,
- };
- unsigned int ring_size;
- void *rx_ring, *tx_ring;
-
- /* Configure ring parameters */
- if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0)
- exit(1);
- if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0)
- exit(1)
-
- /* Calculate size of each individual ring */
- ring_size = req.nm_block_nr * req.nm_block_size;
-
- /* Map RX/TX rings. The TX ring is located after the RX ring */
- rx_ring = mmap(NULL, 2 * ring_size, PROT_READ | PROT_WRITE,
- MAP_SHARED, fd, 0);
- if ((long)rx_ring == -1L)
- exit(1);
- tx_ring = rx_ring + ring_size:
-
-Message reception:
-
-This example assumes some ring parameters of the ring setup are available.
-
- unsigned int frame_offset = 0;
- struct nl_mmap_hdr *hdr;
- struct nlmsghdr *nlh;
- unsigned char buf[16384];
- ssize_t len;
-
- while (1) {
- struct pollfd pfds[1];
-
- pfds[0].fd = fd;
- pfds[0].events = POLLIN | POLLERR;
- pfds[0].revents = 0;
-
- if (poll(pfds, 1, -1) < 0 && errno != -EINTR)
- exit(1);
-
- /* Check for errors. Error handling omitted */
- if (pfds[0].revents & POLLERR)
- <handle error>
-
- /* If no new messages, poll again */
- if (!(pfds[0].revents & POLLIN))
- continue;
-
- /* Process all frames */
- while (1) {
- /* Get next frame header */
- hdr = rx_ring + frame_offset;
-
- if (hdr->nm_status == NL_MMAP_STATUS_VALID) {
- /* Regular memory mapped frame */
- nlh = (void *)hdr + NL_MMAP_HDRLEN;
- len = hdr->nm_len;
-
- /* Release empty message immediately. May happen
- * on error during message construction.
- */
- if (len == 0)
- goto release;
- } else if (hdr->nm_status == NL_MMAP_STATUS_COPY) {
- /* Frame queued to socket receive queue */
- len = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
- if (len <= 0)
- break;
- nlh = buf;
- } else
- /* No more messages to process, continue polling */
- break;
-
- process_msg(nlh);
-release:
- /* Release frame back to the kernel */
- hdr->nm_status = NL_MMAP_STATUS_UNUSED;
-
- /* Advance frame offset to next frame */
- frame_offset = (frame_offset + frame_size) % ring_size;
- }
- }
-
-Message transmission:
-
-This example assumes some ring parameters of the ring setup are available.
-A single message is constructed and transmitted, to send multiple messages
-at once they would be constructed in consecutive frames before a final call
-to sendto().
-
- unsigned int frame_offset = 0;
- struct nl_mmap_hdr *hdr;
- struct nlmsghdr *nlh;
- struct sockaddr_nl addr = {
- .nl_family = AF_NETLINK,
- };
-
- hdr = tx_ring + frame_offset;
- if (hdr->nm_status != NL_MMAP_STATUS_UNUSED)
- /* No frame available. Use poll() to avoid. */
- exit(1);
-
- nlh = (void *)hdr + NL_MMAP_HDRLEN;
-
- /* Build message */
- build_message(nlh);
-
- /* Fill frame header: length and status need to be set */
- hdr->nm_len = nlh->nlmsg_len;
- hdr->nm_status = NL_MMAP_STATUS_VALID;
-
- if (sendto(fd, NULL, 0, 0, &addr, sizeof(addr)) < 0)
- exit(1);
-
- /* Advance frame offset to next frame */
- frame_offset = (frame_offset + frame_size) % ring_size;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
index dd9d6e6100a5..04523d41b873 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
@@ -1824,10 +1824,12 @@ struct dcbx_app_priority_entry {
u8 pri_bitmap;
u8 appBitfield;
#define DCBX_APP_ENTRY_VALID 0x01
- #define DCBX_APP_ENTRY_SF_MASK 0x30
+ #define DCBX_APP_ENTRY_SF_MASK 0xF0
#define DCBX_APP_ENTRY_SF_SHIFT 4
#define DCBX_APP_SF_ETH_TYPE 0x10
#define DCBX_APP_SF_PORT 0x20
+ #define DCBX_APP_SF_UDP 0x40
+ #define DCBX_APP_SF_DEFAULT 0x80
#elif defined(__LITTLE_ENDIAN)
u8 appBitfield;
#define DCBX_APP_ENTRY_VALID 0x01
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index ec6e849676c1..1dac6c6111bf 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -702,6 +702,11 @@ struct doorbell_stats {
u32 db_full;
};
+struct hash_mac_addr {
+ struct list_head list;
+ u8 addr[ETH_ALEN];
+};
+
struct adapter {
void __iomem *regs;
void __iomem *bar2;
@@ -740,6 +745,7 @@ struct adapter {
void *uld_handle[CXGB4_ULD_MAX];
struct list_head list_node;
struct list_head rcu_node;
+ struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */
struct tid_info tids;
void **tid_release_head;
@@ -1207,6 +1213,24 @@ static inline int t4_wr_mbox_ns(struct adapter *adap, int mbox, const void *cmd,
return t4_wr_mbox_meat(adap, mbox, cmd, size, rpl, false);
}
+/**
+ * hash_mac_addr - return the hash value of a MAC address
+ * @addr: the 48-bit Ethernet MAC address
+ *
+ * Hashes a MAC address according to the hash function used by HW inexact
+ * (hash) address matching.
+ */
+static inline int hash_mac_addr(const u8 *addr)
+{
+ u32 a = ((u32)addr[0] << 16) | ((u32)addr[1] << 8) | addr[2];
+ u32 b = ((u32)addr[3] << 16) | ((u32)addr[4] << 8) | addr[5];
+
+ a ^= b;
+ a ^= (a >> 12);
+ a ^= (a >> 6);
+ return a & 0x3f;
+}
+
void t4_write_indirect(struct adapter *adap, unsigned int addr_reg,
unsigned int data_reg, const u32 *vals,
unsigned int nregs, unsigned int start_idx);
@@ -1389,6 +1413,9 @@ int t4_set_rxmode(struct adapter *adap, unsigned int mbox, unsigned int viid,
int t4_alloc_mac_filt(struct adapter *adap, unsigned int mbox,
unsigned int viid, bool free, unsigned int naddr,
const u8 **addr, u16 *idx, u64 *hash, bool sleep_ok);
+int t4_free_mac_filt(struct adapter *adap, unsigned int mbox,
+ unsigned int viid, unsigned int naddr,
+ const u8 **addr, bool sleep_ok);
int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid,
int idx, const u8 *addr, bool persist, bool add_smt);
int t4_set_addr_hash(struct adapter *adap, unsigned int mbox, unsigned int viid,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index b8a5fb0c32d4..adad73f7c8cd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -338,84 +338,108 @@ void t4_os_portmod_changed(const struct adapter *adap, int port_id)
netdev_info(dev, "%s module inserted\n", mod_str[pi->mod_type]);
}
+int dbfifo_int_thresh = 10; /* 10 == 640 entry threshold */
+module_param(dbfifo_int_thresh, int, 0644);
+MODULE_PARM_DESC(dbfifo_int_thresh, "doorbell fifo interrupt threshold");
+
/*
- * Configure the exact and hash address filters to handle a port's multicast
- * and secondary unicast MAC addresses.
+ * usecs to sleep while draining the dbfifo
*/
-static int set_addr_filters(const struct net_device *dev, bool sleep)
+static int dbfifo_drain_delay = 1000;
+module_param(dbfifo_drain_delay, int, 0644);
+MODULE_PARM_DESC(dbfifo_drain_delay,
+ "usecs to sleep while draining the dbfifo");
+
+static inline int cxgb4_set_addr_hash(struct port_info *pi)
{
+ struct adapter *adap = pi->adapter;
+ u64 vec = 0;
+ bool ucast = false;
+ struct hash_mac_addr *entry;
+
+ /* Calculate the hash vector for the updated list and program it */
+ list_for_each_entry(entry, &adap->mac_hlist, list) {
+ ucast |= is_unicast_ether_addr(entry->addr);
+ vec |= (1ULL << hash_mac_addr(entry->addr));
+ }
+ return t4_set_addr_hash(adap, adap->mbox, pi->viid, ucast,
+ vec, false);
+}
+
+static int cxgb4_mac_sync(struct net_device *netdev, const u8 *mac_addr)
+{
+ struct port_info *pi = netdev_priv(netdev);
+ struct adapter *adap = pi->adapter;
+ int ret;
u64 mhash = 0;
u64 uhash = 0;
- bool free = true;
- u16 filt_idx[7];
- const u8 *addr[7];
- int ret, naddr = 0;
- const struct netdev_hw_addr *ha;
- int uc_cnt = netdev_uc_count(dev);
- int mc_cnt = netdev_mc_count(dev);
- const struct port_info *pi = netdev_priv(dev);
- unsigned int mb = pi->adapter->pf;
+ bool free = false;
+ bool ucast = is_unicast_ether_addr(mac_addr);
+ const u8 *maclist[1] = {mac_addr};
+ struct hash_mac_addr *new_entry;
- /* first do the secondary unicast addresses */
- netdev_for_each_uc_addr(ha, dev) {
- addr[naddr++] = ha->addr;
- if (--uc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) {
- ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free,
- naddr, addr, filt_idx, &uhash, sleep);
- if (ret < 0)
- return ret;
-
- free = false;
- naddr = 0;
- }
+ ret = t4_alloc_mac_filt(adap, adap->mbox, pi->viid, free, 1, maclist,
+ NULL, ucast ? &uhash : &mhash, false);
+ if (ret < 0)
+ goto out;
+ /* if hash != 0, then add the addr to hash addr list
+ * so on the end we will calculate the hash for the
+ * list and program it
+ */
+ if (uhash || mhash) {
+ new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC);
+ if (!new_entry)
+ return -ENOMEM;
+ ether_addr_copy(new_entry->addr, mac_addr);
+ list_add_tail(&new_entry->list, &adap->mac_hlist);
+ ret = cxgb4_set_addr_hash(pi);
}
+out:
+ return ret < 0 ? ret : 0;
+}
- /* next set up the multicast addresses */
- netdev_for_each_mc_addr(ha, dev) {
- addr[naddr++] = ha->addr;
- if (--mc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) {
- ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free,
- naddr, addr, filt_idx, &mhash, sleep);
- if (ret < 0)
- return ret;
+static int cxgb4_mac_unsync(struct net_device *netdev, const u8 *mac_addr)
+{
+ struct port_info *pi = netdev_priv(netdev);
+ struct adapter *adap = pi->adapter;
+ int ret;
+ const u8 *maclist[1] = {mac_addr};
+ struct hash_mac_addr *entry, *tmp;
- free = false;
- naddr = 0;
+ /* If the MAC address to be removed is in the hash addr
+ * list, delete it from the list and update hash vector
+ */
+ list_for_each_entry_safe(entry, tmp, &adap->mac_hlist, list) {
+ if (ether_addr_equal(entry->addr, mac_addr)) {
+ list_del(&entry->list);
+ kfree(entry);
+ return cxgb4_set_addr_hash(pi);
}
}
- return t4_set_addr_hash(pi->adapter, mb, pi->viid, uhash != 0,
- uhash | mhash, sleep);
+ ret = t4_free_mac_filt(adap, adap->mbox, pi->viid, 1, maclist, false);
+ return ret < 0 ? -EINVAL : 0;
}
-int dbfifo_int_thresh = 10; /* 10 == 640 entry threshold */
-module_param(dbfifo_int_thresh, int, 0644);
-MODULE_PARM_DESC(dbfifo_int_thresh, "doorbell fifo interrupt threshold");
-
-/*
- * usecs to sleep while draining the dbfifo
- */
-static int dbfifo_drain_delay = 1000;
-module_param(dbfifo_drain_delay, int, 0644);
-MODULE_PARM_DESC(dbfifo_drain_delay,
- "usecs to sleep while draining the dbfifo");
-
/*
* Set Rx properties of a port, such as promiscruity, address filters, and MTU.
* If @mtu is -1 it is left unchanged.
*/
static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
{
- int ret;
struct port_info *pi = netdev_priv(dev);
+ struct adapter *adapter = pi->adapter;
- ret = set_addr_filters(dev, sleep_ok);
- if (ret == 0)
- ret = t4_set_rxmode(pi->adapter, pi->adapter->pf, pi->viid, mtu,
- (dev->flags & IFF_PROMISC) ? 1 : 0,
- (dev->flags & IFF_ALLMULTI) ? 1 : 0, 1, -1,
- sleep_ok);
- return ret;
+ if (!(dev->flags & IFF_PROMISC)) {
+ __dev_uc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
+ if (!(dev->flags & IFF_ALLMULTI))
+ __dev_mc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
+ }
+
+ return t4_set_rxmode(adapter, adapter->mbox, pi->viid, mtu,
+ (dev->flags & IFF_PROMISC) ? 1 : 0,
+ (dev->flags & IFF_ALLMULTI) ? 1 : 0, 1, -1,
+ sleep_ok);
}
/**
@@ -2677,6 +2701,8 @@ static int cxgb_up(struct adapter *adap)
#if IS_ENABLED(CONFIG_IPV6)
update_clip(adap);
#endif
+ /* Initialize hash mac addr list*/
+ INIT_LIST_HEAD(&adap->mac_hlist);
out:
return err;
irq_err:
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 636b4691f252..cc1736bece0f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -4433,23 +4433,6 @@ void t4_intr_disable(struct adapter *adapter)
}
/**
- * hash_mac_addr - return the hash value of a MAC address
- * @addr: the 48-bit Ethernet MAC address
- *
- * Hashes a MAC address according to the hash function used by HW inexact
- * (hash) address matching.
- */
-static int hash_mac_addr(const u8 *addr)
-{
- u32 a = ((u32)addr[0] << 16) | ((u32)addr[1] << 8) | addr[2];
- u32 b = ((u32)addr[3] << 16) | ((u32)addr[4] << 8) | addr[5];
- a ^= b;
- a ^= (a >> 12);
- a ^= (a >> 6);
- return a & 0x3f;
-}
-
-/**
* t4_config_rss_range - configure a portion of the RSS mapping table
* @adapter: the adapter
* @mbox: mbox to use for the FW command
@@ -6738,6 +6721,81 @@ int t4_alloc_mac_filt(struct adapter *adap, unsigned int mbox,
}
/**
+ * t4_free_mac_filt - frees exact-match filters of given MAC addresses
+ * @adap: the adapter
+ * @mbox: mailbox to use for the FW command
+ * @viid: the VI id
+ * @naddr: the number of MAC addresses to allocate filters for (up to 7)
+ * @addr: the MAC address(es)
+ * @sleep_ok: call is allowed to sleep
+ *
+ * Frees the exact-match filter for each of the supplied addresses
+ *
+ * Returns a negative error number or the number of filters freed.
+ */
+int t4_free_mac_filt(struct adapter *adap, unsigned int mbox,
+ unsigned int viid, unsigned int naddr,
+ const u8 **addr, bool sleep_ok)
+{
+ int offset, ret = 0;
+ struct fw_vi_mac_cmd c;
+ unsigned int nfilters = 0;
+ unsigned int max_naddr = is_t4(adap->params.chip) ?
+ NUM_MPS_CLS_SRAM_L_INSTANCES :
+ NUM_MPS_T5_CLS_SRAM_L_INSTANCES;
+ unsigned int rem = naddr;
+
+ if (naddr > max_naddr)
+ return -EINVAL;
+
+ for (offset = 0; offset < (int)naddr ; /**/) {
+ unsigned int fw_naddr = (rem < ARRAY_SIZE(c.u.exact)
+ ? rem
+ : ARRAY_SIZE(c.u.exact));
+ size_t len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd,
+ u.exact[fw_naddr]), 16);
+ struct fw_vi_mac_exact *p;
+ int i;
+
+ memset(&c, 0, sizeof(c));
+ c.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_VI_MAC_CMD) |
+ FW_CMD_REQUEST_F |
+ FW_CMD_WRITE_F |
+ FW_CMD_EXEC_V(0) |
+ FW_VI_MAC_CMD_VIID_V(viid));
+ c.freemacs_to_len16 =
+ cpu_to_be32(FW_VI_MAC_CMD_FREEMACS_V(0) |
+ FW_CMD_LEN16_V(len16));
+
+ for (i = 0, p = c.u.exact; i < (int)fw_naddr; i++, p++) {
+ p->valid_to_idx = cpu_to_be16(
+ FW_VI_MAC_CMD_VALID_F |
+ FW_VI_MAC_CMD_IDX_V(FW_VI_MAC_MAC_BASED_FREE));
+ memcpy(p->macaddr, addr[offset+i], sizeof(p->macaddr));
+ }
+
+ ret = t4_wr_mbox_meat(adap, mbox, &c, sizeof(c), &c, sleep_ok);
+ if (ret)
+ break;
+
+ for (i = 0, p = c.u.exact; i < fw_naddr; i++, p++) {
+ u16 index = FW_VI_MAC_CMD_IDX_G(
+ be16_to_cpu(p->valid_to_idx));
+
+ if (index < max_naddr)
+ nfilters++;
+ }
+
+ offset += fw_naddr;
+ rem -= fw_naddr;
+ }
+
+ if (ret == 0)
+ ret = nfilters;
+ return ret;
+}
+
+/**
* t4_change_mac - modifies the exact-match filter for a MAC address
* @adap: the adapter
* @mbox: mailbox to use for the FW command
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
index 6049f70e110c..4a707c32d76f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
@@ -348,6 +348,11 @@ struct sge {
#define for_each_ethrxq(sge, iter) \
for (iter = 0; iter < (sge)->ethqsets; iter++)
+struct hash_mac_addr {
+ struct list_head list;
+ u8 addr[ETH_ALEN];
+};
+
/*
* Per-"adapter" (Virtual Function) information.
*/
@@ -381,6 +386,9 @@ struct adapter {
/* various locks */
spinlock_t stats_lock;
+
+ /* list of MAC addresses in MPS Hash */
+ struct list_head mac_hlist;
};
enum { /* adapter flags */
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 0cfa5d72cafd..8337514ababb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -741,6 +741,9 @@ static int adapter_up(struct adapter *adapter)
*/
enable_rx(adapter);
t4vf_sge_start(adapter);
+
+ /* Initialize hash mac addr list*/
+ INIT_LIST_HEAD(&adapter->mac_hlist);
return 0;
}
@@ -905,51 +908,74 @@ static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device
return naddr;
}
-/*
- * Configure the exact and hash address filters to handle a port's multicast
- * and secondary unicast MAC addresses.
- */
-static int set_addr_filters(const struct net_device *dev, bool sleep)
+static inline int cxgb4vf_set_addr_hash(struct port_info *pi)
{
- u64 mhash = 0;
- u64 uhash = 0;
- bool free = true;
- unsigned int offset, naddr;
- const u8 *addr[7];
- int ret;
- const struct port_info *pi = netdev_priv(dev);
+ struct adapter *adapter = pi->adapter;
+ u64 vec = 0;
+ bool ucast = false;
+ struct hash_mac_addr *entry;
- /* first do the secondary unicast addresses */
- for (offset = 0; ; offset += naddr) {
- naddr = collect_netdev_uc_list_addrs(dev, addr, offset,
- ARRAY_SIZE(addr));
- if (naddr == 0)
- break;
+ /* Calculate the hash vector for the updated list and program it */
+ list_for_each_entry(entry, &adapter->mac_hlist, list) {
+ ucast |= is_unicast_ether_addr(entry->addr);
+ vec |= (1ULL << hash_mac_addr(entry->addr));
+ }
+ return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false);
+}
- ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
- naddr, addr, NULL, &uhash, sleep);
- if (ret < 0)
- return ret;
+static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr)
+{
+ struct port_info *pi = netdev_priv(netdev);
+ struct adapter *adapter = pi->adapter;
+ int ret;
+ u64 mhash = 0;
+ u64 uhash = 0;
+ bool free = false;
+ bool ucast = is_unicast_ether_addr(mac_addr);
+ const u8 *maclist[1] = {mac_addr};
+ struct hash_mac_addr *new_entry;
- free = false;
+ ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist,
+ NULL, ucast ? &uhash : &mhash, false);
+ if (ret < 0)
+ goto out;
+ /* if hash != 0, then add the addr to hash addr list
+ * so on the end we will calculate the hash for the
+ * list and program it
+ */
+ if (uhash || mhash) {
+ new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC);
+ if (!new_entry)
+ return -ENOMEM;
+ ether_addr_copy(new_entry->addr, mac_addr);
+ list_add_tail(&new_entry->list, &adapter->mac_hlist);
+ ret = cxgb4vf_set_addr_hash(pi);
}
+out:
+ return ret < 0 ? ret : 0;
+}
- /* next set up the multicast addresses */
- for (offset = 0; ; offset += naddr) {
- naddr = collect_netdev_mc_list_addrs(dev, addr, offset,
- ARRAY_SIZE(addr));
- if (naddr == 0)
- break;
+static int cxgb4vf_mac_unsync(struct net_device *netdev, const u8 *mac_addr)
+{
+ struct port_info *pi = netdev_priv(netdev);
+ struct adapter *adapter = pi->adapter;
+ int ret;
+ const u8 *maclist[1] = {mac_addr};
+ struct hash_mac_addr *entry, *tmp;
- ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
- naddr, addr, NULL, &mhash, sleep);
- if (ret < 0)
- return ret;
- free = false;
+ /* If the MAC address to be removed is in the hash addr
+ * list, delete it from the list and update hash vector
+ */
+ list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) {
+ if (ether_addr_equal(entry->addr, mac_addr)) {
+ list_del(&entry->list);
+ kfree(entry);
+ return cxgb4vf_set_addr_hash(pi);
+ }
}
- return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0,
- uhash | mhash, sleep);
+ ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false);
+ return ret < 0 ? -EINVAL : 0;
}
/*
@@ -958,16 +984,18 @@ static int set_addr_filters(const struct net_device *dev, bool sleep)
*/
static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
{
- int ret;
struct port_info *pi = netdev_priv(dev);
- ret = set_addr_filters(dev, sleep_ok);
- if (ret == 0)
- ret = t4vf_set_rxmode(pi->adapter, pi->viid, -1,
- (dev->flags & IFF_PROMISC) != 0,
- (dev->flags & IFF_ALLMULTI) != 0,
- 1, -1, sleep_ok);
- return ret;
+ if (!(dev->flags & IFF_PROMISC)) {
+ __dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
+ if (!(dev->flags & IFF_ALLMULTI))
+ __dev_mc_sync(dev, cxgb4vf_mac_sync,
+ cxgb4vf_mac_unsync);
+ }
+ return t4vf_set_rxmode(pi->adapter, pi->viid, -1,
+ (dev->flags & IFF_PROMISC) != 0,
+ (dev->flags & IFF_ALLMULTI) != 0,
+ 1, -1, sleep_ok);
}
/*
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
index 88b8981b4751..6ce302fe1a61 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
@@ -285,6 +285,24 @@ static inline int is_t4(enum chip_type chip)
return CHELSIO_CHIP_VERSION(chip) == CHELSIO_T4;
}
+/**
+ * hash_mac_addr - return the hash value of a MAC address
+ * @addr: the 48-bit Ethernet MAC address
+ *
+ * Hashes a MAC address according to the hash function used by hardware
+ * inexact (hash) address matching.
+ */
+static inline int hash_mac_addr(const u8 *addr)
+{
+ u32 a = ((u32)addr[0] << 16) | ((u32)addr[1] << 8) | addr[2];
+ u32 b = ((u32)addr[3] << 16) | ((u32)addr[4] << 8) | addr[5];
+
+ a ^= b;
+ a ^= (a >> 12);
+ a ^= (a >> 6);
+ return a & 0x3f;
+}
+
int t4vf_wait_dev_ready(struct adapter *);
int t4vf_port_init(struct adapter *, int);
@@ -320,6 +338,8 @@ int t4vf_set_rxmode(struct adapter *, unsigned int, int, int, int, int, int,
bool);
int t4vf_alloc_mac_filt(struct adapter *, unsigned int, bool, unsigned int,
const u8 **, u16 *, u64 *, bool);
+int t4vf_free_mac_filt(struct adapter *, unsigned int, unsigned int naddr,
+ const u8 **, bool);
int t4vf_change_mac(struct adapter *, unsigned int, int, const u8 *, bool);
int t4vf_set_addr_hash(struct adapter *, unsigned int, bool, u64, bool);
int t4vf_get_port_stats(struct adapter *, int, struct t4vf_port_stats *);
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index b6fa74aafe47..54220117dcba 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@ -236,23 +236,6 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size,
return -ETIMEDOUT;
}
-/**
- * hash_mac_addr - return the hash value of a MAC address
- * @addr: the 48-bit Ethernet MAC address
- *
- * Hashes a MAC address according to the hash function used by hardware
- * inexact (hash) address matching.
- */
-static int hash_mac_addr(const u8 *addr)
-{
- u32 a = ((u32)addr[0] << 16) | ((u32)addr[1] << 8) | addr[2];
- u32 b = ((u32)addr[3] << 16) | ((u32)addr[4] << 8) | addr[5];
- a ^= b;
- a ^= (a >> 12);
- a ^= (a >> 6);
- return a & 0x3f;
-}
-
#define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\
FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \
FW_PORT_CAP_SPEED_100G | FW_PORT_CAP_ANEG)
@@ -1266,6 +1249,77 @@ int t4vf_alloc_mac_filt(struct adapter *adapter, unsigned int viid, bool free,
}
/**
+ * t4vf_free_mac_filt - frees exact-match filters of given MAC addresses
+ * @adapter: the adapter
+ * @viid: the VI id
+ * @naddr: the number of MAC addresses to allocate filters for (up to 7)
+ * @addr: the MAC address(es)
+ * @sleep_ok: call is allowed to sleep
+ *
+ * Frees the exact-match filter for each of the supplied addresses
+ *
+ * Returns a negative error number or the number of filters freed.
+ */
+int t4vf_free_mac_filt(struct adapter *adapter, unsigned int viid,
+ unsigned int naddr, const u8 **addr, bool sleep_ok)
+{
+ int offset, ret = 0;
+ struct fw_vi_mac_cmd cmd;
+ unsigned int nfilters = 0;
+ unsigned int max_naddr = adapter->params.arch.mps_tcam_size;
+ unsigned int rem = naddr;
+
+ if (naddr > max_naddr)
+ return -EINVAL;
+
+ for (offset = 0; offset < (int)naddr ; /**/) {
+ unsigned int fw_naddr = (rem < ARRAY_SIZE(cmd.u.exact) ?
+ rem : ARRAY_SIZE(cmd.u.exact));
+ size_t len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd,
+ u.exact[fw_naddr]), 16);
+ struct fw_vi_mac_exact *p;
+ int i;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_VI_MAC_CMD) |
+ FW_CMD_REQUEST_F |
+ FW_CMD_WRITE_F |
+ FW_CMD_EXEC_V(0) |
+ FW_VI_MAC_CMD_VIID_V(viid));
+ cmd.freemacs_to_len16 =
+ cpu_to_be32(FW_VI_MAC_CMD_FREEMACS_V(0) |
+ FW_CMD_LEN16_V(len16));
+
+ for (i = 0, p = cmd.u.exact; i < (int)fw_naddr; i++, p++) {
+ p->valid_to_idx = cpu_to_be16(
+ FW_VI_MAC_CMD_VALID_F |
+ FW_VI_MAC_CMD_IDX_V(FW_VI_MAC_MAC_BASED_FREE));
+ memcpy(p->macaddr, addr[offset+i], sizeof(p->macaddr));
+ }
+
+ ret = t4vf_wr_mbox_core(adapter, &cmd, sizeof(cmd), &cmd,
+ sleep_ok);
+ if (ret)
+ break;
+
+ for (i = 0, p = cmd.u.exact; i < fw_naddr; i++, p++) {
+ u16 index = FW_VI_MAC_CMD_IDX_G(
+ be16_to_cpu(p->valid_to_idx));
+
+ if (index < max_naddr)
+ nfilters++;
+ }
+
+ offset += fw_naddr;
+ rem -= fw_naddr;
+ }
+
+ if (ret == 0)
+ ret = nfilters;
+ return ret;
+}
+
+/**
* t4vf_change_mac - modifies the exact-match filter for a MAC address
* @adapter: the adapter
* @viid: the Virtual Interface ID
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 978d07a61bbf..73feaf7eedb8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -124,6 +124,8 @@ struct qed_sp_vport_update_params {
u8 update_vport_active_tx_flg;
u8 vport_active_tx_flg;
u8 update_approx_mcast_flg;
+ u8 update_accept_any_vlan_flg;
+ u8 accept_any_vlan;
unsigned long bins[8];
struct qed_rss_params *rss_params;
struct qed_filter_accept_flags accept_flags;
@@ -393,7 +395,9 @@ qed_sp_vport_update(struct qed_hwfn *p_hwfn,
p_cmn->update_rx_active_flg = p_params->update_vport_active_rx_flg;
p_cmn->tx_active_flg = p_params->vport_active_tx_flg;
p_cmn->update_tx_active_flg = p_params->update_vport_active_tx_flg;
-
+ p_cmn->accept_any_vlan = p_params->accept_any_vlan;
+ p_cmn->update_accept_any_vlan_flg =
+ p_params->update_accept_any_vlan_flg;
rc = qed_sp_vport_update_rss(p_hwfn, p_ramrod, p_rss_params);
if (rc) {
/* Return spq entry which is taken in qed_sp_init_request()*/
@@ -444,8 +448,10 @@ static int qed_sp_vport_stop(struct qed_hwfn *p_hwfn,
static int qed_filter_accept_cmd(struct qed_dev *cdev,
u8 vport,
struct qed_filter_accept_flags accept_flags,
- enum spq_mode comp_mode,
- struct qed_spq_comp_cb *p_comp_data)
+ u8 update_accept_any_vlan,
+ u8 accept_any_vlan,
+ enum spq_mode comp_mode,
+ struct qed_spq_comp_cb *p_comp_data)
{
struct qed_sp_vport_update_params vport_update_params;
int i, rc;
@@ -454,6 +460,8 @@ static int qed_filter_accept_cmd(struct qed_dev *cdev,
memset(&vport_update_params, 0, sizeof(vport_update_params));
vport_update_params.vport_id = vport;
vport_update_params.accept_flags = accept_flags;
+ vport_update_params.update_accept_any_vlan_flg = update_accept_any_vlan;
+ vport_update_params.accept_any_vlan = accept_any_vlan;
for_each_hwfn(cdev, i) {
struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
@@ -471,6 +479,10 @@ static int qed_filter_accept_cmd(struct qed_dev *cdev,
"Accept filter configured, flags = [Rx]%x [Tx]%x\n",
accept_flags.rx_accept_filter,
accept_flags.tx_accept_filter);
+ if (update_accept_any_vlan)
+ DP_VERBOSE(p_hwfn, QED_MSG_SP,
+ "accept_any_vlan=%d configured\n",
+ accept_any_vlan);
}
return 0;
@@ -1347,6 +1359,9 @@ static int qed_update_vport(struct qed_dev *cdev,
params->update_vport_active_flg;
sp_params.vport_active_rx_flg = params->vport_active_flg;
sp_params.vport_active_tx_flg = params->vport_active_flg;
+ sp_params.accept_any_vlan = params->accept_any_vlan;
+ sp_params.update_accept_any_vlan_flg =
+ params->update_accept_any_vlan_flg;
/* RSS - is a bit tricky, since upper-layer isn't familiar with hwfns.
* We need to re-fix the rss values per engine for CMT.
@@ -1566,7 +1581,7 @@ static int qed_configure_filter_rx_mode(struct qed_dev *cdev,
else if (type == QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC)
accept_flags.rx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED;
- return qed_filter_accept_cmd(cdev, 0, accept_flags,
+ return qed_filter_accept_cmd(cdev, 0, accept_flags, false, false,
QED_SPQ_MODE_CB, NULL);
}
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index f75d9e0676ce..15c5528b4f39 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -100,6 +100,12 @@ struct qede_stats {
u64 tx_mac_ctrl_frames;
};
+struct qede_vlan {
+ struct list_head list;
+ u16 vid;
+ bool configured;
+};
+
struct qede_dev {
struct qed_dev *cdev;
struct net_device *ndev;
@@ -154,6 +160,10 @@ struct qede_dev {
u16 q_num_rx_buffers; /* Must be a power of two */
u16 q_num_tx_buffers; /* Must be a power of two */
+ struct list_head vlan_list;
+ u16 configured_vlans;
+ u16 non_configured_vlans;
+ bool accept_any_vlan;
struct delayed_work sp_task;
unsigned long sp_flags;
};
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index f50e0bd7fb2c..5f15e23a0f7d 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1140,6 +1140,21 @@ static int qede_set_ucast_rx_mac(struct qede_dev *edev,
return edev->ops->filter_config(edev->cdev, &filter_cmd);
}
+static int qede_set_ucast_rx_vlan(struct qede_dev *edev,
+ enum qed_filter_xcast_params_type opcode,
+ u16 vid)
+{
+ struct qed_filter_params filter_cmd;
+
+ memset(&filter_cmd, 0, sizeof(filter_cmd));
+ filter_cmd.type = QED_FILTER_TYPE_UCAST;
+ filter_cmd.filter.ucast.type = opcode;
+ filter_cmd.filter.ucast.vlan_valid = 1;
+ filter_cmd.filter.ucast.vlan = vid;
+
+ return edev->ops->filter_config(edev->cdev, &filter_cmd);
+}
+
void qede_fill_by_demand_stats(struct qede_dev *edev)
{
struct qed_eth_stats stats;
@@ -1252,6 +1267,247 @@ static struct rtnl_link_stats64 *qede_get_stats64(
return stats;
}
+static void qede_config_accept_any_vlan(struct qede_dev *edev, bool action)
+{
+ struct qed_update_vport_params params;
+ int rc;
+
+ /* Proceed only if action actually needs to be performed */
+ if (edev->accept_any_vlan == action)
+ return;
+
+ memset(&params, 0, sizeof(params));
+
+ params.vport_id = 0;
+ params.accept_any_vlan = action;
+ params.update_accept_any_vlan_flg = 1;
+
+ rc = edev->ops->vport_update(edev->cdev, &params);
+ if (rc) {
+ DP_ERR(edev, "Failed to %s accept-any-vlan\n",
+ action ? "enable" : "disable");
+ } else {
+ DP_INFO(edev, "%s accept-any-vlan\n",
+ action ? "enabled" : "disabled");
+ edev->accept_any_vlan = action;
+ }
+}
+
+static int qede_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct qede_dev *edev = netdev_priv(dev);
+ struct qede_vlan *vlan, *tmp;
+ int rc;
+
+ DP_VERBOSE(edev, NETIF_MSG_IFUP, "Adding vlan 0x%04x\n", vid);
+
+ vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+ if (!vlan) {
+ DP_INFO(edev, "Failed to allocate struct for vlan\n");
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(&vlan->list);
+ vlan->vid = vid;
+ vlan->configured = false;
+
+ /* Verify vlan isn't already configured */
+ list_for_each_entry(tmp, &edev->vlan_list, list) {
+ if (tmp->vid == vlan->vid) {
+ DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
+ "vlan already configured\n");
+ kfree(vlan);
+ return -EEXIST;
+ }
+ }
+
+ /* If interface is down, cache this VLAN ID and return */
+ if (edev->state != QEDE_STATE_OPEN) {
+ DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
+ "Interface is down, VLAN %d will be configured when interface is up\n",
+ vid);
+ if (vid != 0)
+ edev->non_configured_vlans++;
+ list_add(&vlan->list, &edev->vlan_list);
+
+ return 0;
+ }
+
+ /* Check for the filter limit.
+ * Note - vlan0 has a reserved filter and can be added without
+ * worrying about quota
+ */
+ if ((edev->configured_vlans < edev->dev_info.num_vlan_filters) ||
+ (vlan->vid == 0)) {
+ rc = qede_set_ucast_rx_vlan(edev,
+ QED_FILTER_XCAST_TYPE_ADD,
+ vlan->vid);
+ if (rc) {
+ DP_ERR(edev, "Failed to configure VLAN %d\n",
+ vlan->vid);
+ kfree(vlan);
+ return -EINVAL;
+ }
+ vlan->configured = true;
+
+ /* vlan0 filter isn't consuming out of our quota */
+ if (vlan->vid != 0)
+ edev->configured_vlans++;
+ } else {
+ /* Out of quota; Activate accept-any-VLAN mode */
+ if (!edev->non_configured_vlans)
+ qede_config_accept_any_vlan(edev, true);
+
+ edev->non_configured_vlans++;
+ }
+
+ list_add(&vlan->list, &edev->vlan_list);
+
+ return 0;
+}
+
+static void qede_del_vlan_from_list(struct qede_dev *edev,
+ struct qede_vlan *vlan)
+{
+ /* vlan0 filter isn't consuming out of our quota */
+ if (vlan->vid != 0) {
+ if (vlan->configured)
+ edev->configured_vlans--;
+ else
+ edev->non_configured_vlans--;
+ }
+
+ list_del(&vlan->list);
+ kfree(vlan);
+}
+
+static int qede_configure_vlan_filters(struct qede_dev *edev)
+{
+ int rc = 0, real_rc = 0, accept_any_vlan = 0;
+ struct qed_dev_eth_info *dev_info;
+ struct qede_vlan *vlan = NULL;
+
+ if (list_empty(&edev->vlan_list))
+ return 0;
+
+ dev_info = &edev->dev_info;
+
+ /* Configure non-configured vlans */
+ list_for_each_entry(vlan, &edev->vlan_list, list) {
+ if (vlan->configured)
+ continue;
+
+ /* We have used all our credits, now enable accept_any_vlan */
+ if ((vlan->vid != 0) &&
+ (edev->configured_vlans == dev_info->num_vlan_filters)) {
+ accept_any_vlan = 1;
+ continue;
+ }
+
+ DP_VERBOSE(edev, NETIF_MSG_IFUP, "Adding vlan %d\n", vlan->vid);
+
+ rc = qede_set_ucast_rx_vlan(edev, QED_FILTER_XCAST_TYPE_ADD,
+ vlan->vid);
+ if (rc) {
+ DP_ERR(edev, "Failed to configure VLAN %u\n",
+ vlan->vid);
+ real_rc = rc;
+ continue;
+ }
+
+ vlan->configured = true;
+ /* vlan0 filter doesn't consume our VLAN filter's quota */
+ if (vlan->vid != 0) {
+ edev->non_configured_vlans--;
+ edev->configured_vlans++;
+ }
+ }
+
+ /* enable accept_any_vlan mode if we have more VLANs than credits,
+ * or remove accept_any_vlan mode if we've actually removed
+ * a non-configured vlan, and all remaining vlans are truly configured.
+ */
+
+ if (accept_any_vlan)
+ qede_config_accept_any_vlan(edev, true);
+ else if (!edev->non_configured_vlans)
+ qede_config_accept_any_vlan(edev, false);
+
+ return real_rc;
+}
+
+static int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct qede_dev *edev = netdev_priv(dev);
+ struct qede_vlan *vlan = NULL;
+ int rc;
+
+ DP_VERBOSE(edev, NETIF_MSG_IFDOWN, "Removing vlan 0x%04x\n", vid);
+
+ /* Find whether entry exists */
+ list_for_each_entry(vlan, &edev->vlan_list, list)
+ if (vlan->vid == vid)
+ break;
+
+ if (!vlan || (vlan->vid != vid)) {
+ DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
+ "Vlan isn't configured\n");
+ return 0;
+ }
+
+ if (edev->state != QEDE_STATE_OPEN) {
+ /* As interface is already down, we don't have a VPORT
+ * instance to remove vlan filter. So just update vlan list
+ */
+ DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
+ "Interface is down, removing VLAN from list only\n");
+ qede_del_vlan_from_list(edev, vlan);
+ return 0;
+ }
+
+ /* Remove vlan */
+ rc = qede_set_ucast_rx_vlan(edev, QED_FILTER_XCAST_TYPE_DEL, vid);
+ if (rc) {
+ DP_ERR(edev, "Failed to remove VLAN %d\n", vid);
+ return -EINVAL;
+ }
+
+ qede_del_vlan_from_list(edev, vlan);
+
+ /* We have removed a VLAN - try to see if we can
+ * configure non-configured VLAN from the list.
+ */
+ rc = qede_configure_vlan_filters(edev);
+
+ return rc;
+}
+
+static void qede_vlan_mark_nonconfigured(struct qede_dev *edev)
+{
+ struct qede_vlan *vlan = NULL;
+
+ if (list_empty(&edev->vlan_list))
+ return;
+
+ list_for_each_entry(vlan, &edev->vlan_list, list) {
+ if (!vlan->configured)
+ continue;
+
+ vlan->configured = false;
+
+ /* vlan0 filter isn't consuming out of our quota */
+ if (vlan->vid != 0) {
+ edev->non_configured_vlans++;
+ edev->configured_vlans--;
+ }
+
+ DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
+ "marked vlan %d as non-configured\n",
+ vlan->vid);
+ }
+
+ edev->accept_any_vlan = false;
+}
+
static const struct net_device_ops qede_netdev_ops = {
.ndo_open = qede_open,
.ndo_stop = qede_close,
@@ -1260,6 +1516,8 @@ static const struct net_device_ops qede_netdev_ops = {
.ndo_set_mac_address = qede_set_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = qede_change_mtu,
+ .ndo_vlan_rx_add_vid = qede_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = qede_vlan_rx_kill_vid,
.ndo_get_stats64 = qede_get_stats64,
};
@@ -1304,6 +1562,8 @@ static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev,
edev->num_tc = edev->dev_info.num_tc;
+ INIT_LIST_HEAD(&edev->vlan_list);
+
return edev;
}
@@ -1335,7 +1595,7 @@ static void qede_init_ndev(struct qede_dev *edev)
NETIF_F_HIGHDMA;
ndev->features = hw_features | NETIF_F_RXHASH | NETIF_F_RXCSUM |
NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HIGHDMA |
- NETIF_F_HW_VLAN_CTAG_TX;
+ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_TX;
ndev->hw_features = hw_features;
@@ -2342,6 +2602,7 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode)
DP_INFO(edev, "Stopped Queues\n");
+ qede_vlan_mark_nonconfigured(edev);
edev->ops->fastpath_stop(edev->cdev);
/* Release the interrupts */
@@ -2410,6 +2671,9 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
edev->state = QEDE_STATE_OPEN;
mutex_unlock(&edev->qede_lock);
+ /* Program un-configured VLANs */
+ qede_configure_vlan_filters(edev);
+
/* Ask for link-up using current configuration */
memset(&link_params, 0, sizeof(link_params));
link_params.link_up = true;
@@ -2670,6 +2934,17 @@ static void qede_config_rx_mode(struct net_device *ndev)
goto out;
}
+ /* take care of VLAN mode */
+ if (ndev->flags & IFF_PROMISC) {
+ qede_config_accept_any_vlan(edev, true);
+ } else if (!edev->non_configured_vlans) {
+ /* It's possible that accept_any_vlan mode is set due to a
+ * previous setting of IFF_PROMISC. If vlan credits are
+ * sufficient, disable accept_any_vlan.
+ */
+ qede_config_accept_any_vlan(edev, false);
+ }
+
rx_mode.filter.accept_flags = accept_flags;
edev->ops->filter_config(edev->cdev, &rx_mode);
out:
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 6f208132a574..dfbe3ca687f7 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -110,6 +110,11 @@ static __be64 vni_to_tunnel_id(const __u8 *vni)
#endif
}
+static sa_family_t geneve_get_sk_family(struct geneve_sock *gs)
+{
+ return gs->sock->sk->sk_family;
+}
+
static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
__be32 addr, u8 vni[])
{
@@ -153,58 +158,60 @@ static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
return (struct genevehdr *)(udp_hdr(skb) + 1);
}
-/* geneve receive/decap routine */
-static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb)
+static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs,
+ struct sk_buff *skb)
{
- struct genevehdr *gnvh = geneve_hdr(skb);
- struct metadata_dst *tun_dst = NULL;
- struct geneve_dev *geneve = NULL;
- struct pcpu_sw_netstats *stats;
- struct iphdr *iph = NULL;
+ u8 *vni;
__be32 addr;
static u8 zero_vni[3];
- u8 *vni;
- int err = 0;
- sa_family_t sa_family;
#if IS_ENABLED(CONFIG_IPV6)
- struct ipv6hdr *ip6h = NULL;
- struct in6_addr addr6;
static struct in6_addr zero_addr6;
#endif
- sa_family = gs->sock->sk->sk_family;
+ if (geneve_get_sk_family(gs) == AF_INET) {
+ struct iphdr *iph;
- if (sa_family == AF_INET) {
iph = ip_hdr(skb); /* outer IP header... */
if (gs->collect_md) {
vni = zero_vni;
addr = 0;
} else {
- vni = gnvh->vni;
-
+ vni = geneve_hdr(skb)->vni;
addr = iph->saddr;
}
- geneve = geneve_lookup(gs, addr, vni);
+ return geneve_lookup(gs, addr, vni);
#if IS_ENABLED(CONFIG_IPV6)
- } else if (sa_family == AF_INET6) {
+ } else if (geneve_get_sk_family(gs) == AF_INET6) {
+ struct ipv6hdr *ip6h;
+ struct in6_addr addr6;
+
ip6h = ipv6_hdr(skb); /* outer IPv6 header... */
if (gs->collect_md) {
vni = zero_vni;
addr6 = zero_addr6;
} else {
- vni = gnvh->vni;
-
+ vni = geneve_hdr(skb)->vni;
addr6 = ip6h->saddr;
}
- geneve = geneve6_lookup(gs, addr6, vni);
+ return geneve6_lookup(gs, addr6, vni);
#endif
}
- if (!geneve)
- goto drop;
+ return NULL;
+}
+
+/* geneve receive/decap routine */
+static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
+ struct sk_buff *skb)
+{
+ struct genevehdr *gnvh = geneve_hdr(skb);
+ struct metadata_dst *tun_dst = NULL;
+ struct pcpu_sw_netstats *stats;
+ int err = 0;
+ void *oiph;
if (ip_tunnel_collect_metadata() || gs->collect_md) {
__be16 flags;
@@ -213,7 +220,7 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb)
(gnvh->oam ? TUNNEL_OAM : 0) |
(gnvh->critical ? TUNNEL_CRIT_OPT : 0);
- tun_dst = udp_tun_rx_dst(skb, sa_family, flags,
+ tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags,
vni_to_tunnel_id(gnvh->vni),
gnvh->opt_len * 4);
if (!tun_dst)
@@ -230,7 +237,6 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb)
}
skb_reset_mac_header(skb);
- skb_scrub_packet(skb, !net_eq(geneve->net, dev_net(geneve->dev)));
skb->protocol = eth_type_trans(skb, geneve->dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
@@ -241,25 +247,27 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb)
if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr))
goto drop;
+ oiph = skb_network_header(skb);
skb_reset_network_header(skb);
- if (iph)
- err = IP_ECN_decapsulate(iph, skb);
+ if (geneve_get_sk_family(gs) == AF_INET)
+ err = IP_ECN_decapsulate(oiph, skb);
#if IS_ENABLED(CONFIG_IPV6)
- if (ip6h)
- err = IP6_ECN_decapsulate(ip6h, skb);
+ else
+ err = IP6_ECN_decapsulate(oiph, skb);
#endif
if (unlikely(err)) {
if (log_ecn_error) {
- if (iph)
+ if (geneve_get_sk_family(gs) == AF_INET)
net_info_ratelimited("non-ECT from %pI4 "
"with TOS=%#x\n",
- &iph->saddr, iph->tos);
+ &((struct iphdr *)oiph)->saddr,
+ ((struct iphdr *)oiph)->tos);
#if IS_ENABLED(CONFIG_IPV6)
- if (ip6h)
+ else
net_info_ratelimited("non-ECT from %pI6\n",
- &ip6h->saddr);
+ &((struct ipv6hdr *)oiph)->saddr);
#endif
}
if (err > 1) {
@@ -321,6 +329,7 @@ static void geneve_uninit(struct net_device *dev)
static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct genevehdr *geneveh;
+ struct geneve_dev *geneve;
struct geneve_sock *gs;
int opts_len;
@@ -336,16 +345,21 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
goto error;
- opts_len = geneveh->opt_len * 4;
- if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
- htons(ETH_P_TEB)))
- goto drop;
-
gs = rcu_dereference_sk_user_data(sk);
if (!gs)
goto drop;
- geneve_rx(gs, skb);
+ geneve = geneve_lookup_skb(gs, skb);
+ if (!geneve)
+ goto drop;
+
+ opts_len = geneveh->opt_len * 4;
+ if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
+ htons(ETH_P_TEB),
+ !net_eq(geneve->net, dev_net(geneve->dev))))
+ goto drop;
+
+ geneve_rx(geneve, gs, skb);
return 0;
drop:
@@ -392,7 +406,7 @@ static void geneve_notify_add_rx_port(struct geneve_sock *gs)
struct net_device *dev;
struct sock *sk = gs->sock->sk;
struct net *net = sock_net(sk);
- sa_family_t sa_family = sk->sk_family;
+ sa_family_t sa_family = geneve_get_sk_family(gs);
__be16 port = inet_sk(sk)->inet_sport;
int err;
@@ -553,7 +567,7 @@ static void geneve_notify_del_rx_port(struct geneve_sock *gs)
struct net_device *dev;
struct sock *sk = gs->sock->sk;
struct net *net = sock_net(sk);
- sa_family_t sa_family = sk->sk_family;
+ sa_family_t sa_family = geneve_get_sk_family(gs);
__be16 port = inet_sk(sk)->inet_sport;
rcu_read_lock();
@@ -596,7 +610,7 @@ static struct geneve_sock *geneve_find_sock(struct geneve_net *gn,
list_for_each_entry(gs, &gn->sock_list, list) {
if (inet_sk(gs->sock->sk)->inet_sport == dst_port &&
- inet_sk(gs->sock->sk)->sk.sk_family == family) {
+ geneve_get_sk_family(gs) == family) {
return gs;
}
}
diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c
index 556904f572d6..03d54c4adc88 100644
--- a/drivers/net/phy/dp83848.c
+++ b/drivers/net/phy/dp83848.c
@@ -103,7 +103,7 @@ MODULE_DEVICE_TABLE(mdio, dp83848_tbl);
static struct phy_driver dp83848_driver[] = {
DP83848_PHY_DRIVER(TI_DP83848C_PHY_ID, "TI DP83848C 10/100 Mbps PHY"),
- DP83848_PHY_DRIVER(NS_DP83848C_PHY_ID, "TI DP83848C 10/100 Mbps PHY"),
+ DP83848_PHY_DRIVER(NS_DP83848C_PHY_ID, "NS DP83848C 10/100 Mbps PHY"),
DP83848_PHY_DRIVER(TLK10X_PHY_ID, "TI TLK10X 10/100 Mbps PHY"),
};
module_phy_driver(dp83848_driver);
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 3a84680b5117..c963897e713d 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1187,24 +1187,17 @@ out:
unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
}
-static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
- struct vxlan_metadata *md, __be32 vni,
+static void vxlan_rcv(struct vxlan_dev *vxlan, struct vxlan_sock *vs,
+ struct sk_buff *skb, struct vxlan_metadata *md,
struct metadata_dst *tun_dst)
{
struct iphdr *oip = NULL;
struct ipv6hdr *oip6 = NULL;
- struct vxlan_dev *vxlan;
struct pcpu_sw_netstats *stats;
union vxlan_addr saddr;
int err = 0;
- /* Is this VNI defined? */
- vxlan = vxlan_vs_find_vni(vs, vni);
- if (!vxlan)
- goto drop;
-
skb_reset_mac_header(skb);
- skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
skb->protocol = eth_type_trans(skb, vxlan->dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
@@ -1281,6 +1274,7 @@ drop:
static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct metadata_dst *tun_dst = NULL;
+ struct vxlan_dev *vxlan;
struct vxlan_sock *vs;
struct vxlanhdr unparsed;
struct vxlan_metadata _md;
@@ -1302,17 +1296,23 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
unparsed.vx_flags &= ~VXLAN_HF_VNI;
unparsed.vx_vni &= ~VXLAN_VNI_MASK;
- if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
- goto drop;
-
vs = rcu_dereference_sk_user_data(sk);
if (!vs)
goto drop;
+ vxlan = vxlan_vs_find_vni(vs, vxlan_vni(vxlan_hdr(skb)->vx_vni));
+ if (!vxlan)
+ goto drop;
+
+ if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB),
+ !net_eq(vxlan->net, dev_net(vxlan->dev))))
+ goto drop;
+
if (vxlan_collect_metadata(vs)) {
+ __be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
+
tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY,
- vxlan_vni(vxlan_hdr(skb)->vx_vni),
- sizeof(*md));
+ vxlan_vni_to_tun_id(vni), sizeof(*md));
if (!tun_dst)
goto drop;
@@ -1343,7 +1343,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
goto drop;
}
- vxlan_rcv(vs, skb, md, vxlan_vni(vxlan_hdr(skb)->vx_vni), tun_dst);
+ vxlan_rcv(vxlan, vs, skb, md, tun_dst);
return 0;
drop:
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index ba0d9789eb6e..1d82dd5e9a08 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -34,8 +34,6 @@ int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
int nfnetlink_has_listeners(struct net *net, unsigned int group);
-struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
- u32 dst_portid, gfp_t gfp_mask);
int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
unsigned int group, int echo, gfp_t flags);
int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error);
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 0b41959aab9f..da14ab61f363 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -69,16 +69,6 @@ extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group)
extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
extern int netlink_has_listeners(struct sock *sk, unsigned int group);
-extern struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
- unsigned int ldiff, u32 dst_portid,
- gfp_t gfp_mask);
-static inline struct sk_buff *
-netlink_alloc_skb(struct sock *ssk, unsigned int size, u32 dst_portid,
- gfp_t gfp_mask)
-{
- return __netlink_alloc_skb(ssk, size, 0, dst_portid, gfp_mask);
-}
-
extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid,
__u32 group, gfp_t allocation);
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 81ab178e31c1..e53b0ca49e41 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -33,6 +33,8 @@ struct qed_update_vport_params {
u8 vport_id;
u8 update_vport_active_flg;
u8 vport_active_flg;
+ u8 update_accept_any_vlan_flg;
+ u8 accept_any_vlan;
u8 update_rss_flg;
struct qed_update_vport_rss_params rss_params;
};
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 39206751463e..89b536796e53 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3713,19 +3713,18 @@ static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
*/
static inline __wsum lco_csum(struct sk_buff *skb)
{
- char *inner_csum_field;
- __wsum csum;
+ unsigned char *csum_start = skb_checksum_start(skb);
+ unsigned char *l4_hdr = skb_transport_header(skb);
+ __wsum partial;
/* Start with complement of inner checksum adjustment */
- inner_csum_field = skb->data + skb_checksum_start_offset(skb) +
- skb->csum_offset;
- csum = ~csum_unfold(*(__force __sum16 *)inner_csum_field);
+ partial = ~csum_unfold(*(__force __sum16 *)(csum_start +
+ skb->csum_offset));
+
/* Add in checksum of our headers (incl. outer checksum
- * adjustment filled in by caller)
+ * adjustment filled in by caller) and return result.
*/
- csum = skb_checksum(skb, 0, skb_checksum_start_offset(skb), csum);
- /* The result is the checksum from skb->data to end of packet */
- return csum;
+ return csum_partial(l4_hdr, csum_start - l4_hdr, partial);
}
#endif /* __KERNEL__ */
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 43c0e771f417..8d4608ce8716 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -83,7 +83,6 @@ struct genl_family {
* @attrs: netlink attributes
* @_net: network namespace
* @user_ptr: user pointers
- * @dst_sk: destination socket
*/
struct genl_info {
u32 snd_seq;
@@ -94,7 +93,6 @@ struct genl_info {
struct nlattr ** attrs;
possible_net_t _net;
void * user_ptr[2];
- struct sock * dst_sk;
};
static inline struct net *genl_info_net(struct genl_info *info)
@@ -188,8 +186,6 @@ int genl_unregister_family(struct genl_family *family);
void genl_notify(struct genl_family *family, struct sk_buff *skb,
struct genl_info *info, u32 group, gfp_t flags);
-struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info,
- gfp_t flags);
void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
struct genl_family *family, int flags, u8 cmd);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 87408ab80856..4dd616376fec 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -270,7 +270,8 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
return INET_ECN_encapsulate(tos, inner);
}
-int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
+int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto,
+ bool xnet);
void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, u8 proto,
u8 tos, u8 ttl, __be16 df, bool xnet);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 6570f379aba2..f3c9857c645d 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -259,8 +259,12 @@ static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
rcu_read_lock();
opt = rcu_dereference(np->opt);
- if (opt && !atomic_inc_not_zero(&opt->refcnt))
- opt = NULL;
+ if (opt) {
+ if (!atomic_inc_not_zero(&opt->refcnt))
+ opt = NULL;
+ else
+ opt = rcu_pointer_handoff(opt);
+ }
rcu_read_unlock();
return opt;
}
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 1b85a3b40c5a..748083de367a 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -294,6 +294,15 @@ static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id)
#endif
}
+static inline __be64 vxlan_vni_to_tun_id(__be32 vni)
+{
+#if defined(__BIG_ENDIAN)
+ return (__be64)vni;
+#else
+ return (__be64)vni << 32;
+#endif
+}
+
static inline size_t vxlan_rco_start(__be32 vni_field)
{
return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index f095155d8749..0dba4e4ed2be 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -107,8 +107,10 @@ struct nlmsgerr {
#define NETLINK_PKTINFO 3
#define NETLINK_BROADCAST_ERROR 4
#define NETLINK_NO_ENOBUFS 5
+#ifndef __KERNEL__
#define NETLINK_RX_RING 6
#define NETLINK_TX_RING 7
+#endif
#define NETLINK_LISTEN_ALL_NSID 8
#define NETLINK_LIST_MEMBERSHIPS 9
#define NETLINK_CAP_ACK 10
@@ -134,6 +136,7 @@ struct nl_mmap_hdr {
__u32 nm_gid;
};
+#ifndef __KERNEL__
enum nl_mmap_status {
NL_MMAP_STATUS_UNUSED,
NL_MMAP_STATUS_RESERVED,
@@ -145,6 +148,7 @@ enum nl_mmap_status {
#define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO
#define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT)
#define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr))
+#endif
#define NET_MAJOR 36 /* Major 36 is reserved for networking */
diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h
index f2159d30d1f5..d79399394b46 100644
--- a/include/uapi/linux/netlink_diag.h
+++ b/include/uapi/linux/netlink_diag.h
@@ -48,6 +48,8 @@ enum {
#define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */
#define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */
+#ifndef __KERNEL__
#define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */
+#endif
#endif
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 40197ff8918a..e9c635eae24d 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -598,7 +598,6 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state)
return -ENETDOWN;
br_set_state(p, state);
- br_log_state(p);
br_port_state_selection(p->br);
return 0;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 302ab0a43725..1b5d145dfcbf 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -900,7 +900,6 @@ static inline void br_nf_core_fini(void) {}
#endif
/* br_stp.c */
-void br_log_state(const struct net_bridge_port *p);
void br_set_state(struct net_bridge_port *p, unsigned int state);
struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no);
void br_init_port(struct net_bridge_port *p);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index b3cca126b103..c22816a0b1b1 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -30,13 +30,6 @@ static const char *const br_port_state_names[] = {
[BR_STATE_BLOCKING] = "blocking",
};
-void br_log_state(const struct net_bridge_port *p)
-{
- br_info(p->br, "port %u(%s) entered %s state\n",
- (unsigned int) p->port_no, p->dev->name,
- br_port_state_names[p->state]);
-}
-
void br_set_state(struct net_bridge_port *p, unsigned int state)
{
struct switchdev_attr attr = {
@@ -52,6 +45,10 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
if (err && err != -EOPNOTSUPP)
br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
(unsigned int) p->port_no, p->dev->name);
+ else
+ br_info(p->br, "port %u(%s) entered %s state\n",
+ (unsigned int) p->port_no, p->dev->name,
+ br_port_state_names[p->state]);
}
/* called under bridge lock */
@@ -126,7 +123,6 @@ static void br_root_port_block(const struct net_bridge *br,
(unsigned int) p->port_no, p->dev->name);
br_set_state(p, BR_STATE_LISTENING);
- br_log_state(p);
br_ifinfo_notify(RTM_NEWLINK, p);
if (br->forward_delay > 0)
@@ -407,7 +403,6 @@ static void br_make_blocking(struct net_bridge_port *p)
br_topology_change_detection(p->br);
br_set_state(p, BR_STATE_BLOCKING);
- br_log_state(p);
br_ifinfo_notify(RTM_NEWLINK, p);
del_timer(&p->forward_delay_timer);
@@ -431,7 +426,6 @@ static void br_make_forwarding(struct net_bridge_port *p)
else
br_set_state(p, BR_STATE_LEARNING);
- br_log_state(p);
br_ifinfo_notify(RTM_NEWLINK, p);
if (br->forward_delay != 0)
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index a31ac6ad76a2..984d46263007 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -102,7 +102,6 @@ void br_stp_enable_port(struct net_bridge_port *p)
{
br_init_port(p);
br_port_state_selection(p->br);
- br_log_state(p);
br_ifinfo_notify(RTM_NEWLINK, p);
}
@@ -118,7 +117,6 @@ void br_stp_disable_port(struct net_bridge_port *p)
p->topology_change_ack = 0;
p->config_pending = 0;
- br_log_state(p);
br_ifinfo_notify(RTM_NEWLINK, p);
del_timer(&p->message_age_timer);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 5f0f5af0ec35..da058b85aa22 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -98,7 +98,6 @@ static void br_forward_delay_timer_expired(unsigned long arg)
br_topology_change_detection(br);
netif_carrier_on(br->dev);
}
- br_log_state(p);
rcu_read_lock();
br_ifinfo_notify(RTM_NEWLINK, p);
rcu_read_unlock();
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 917c2c1bfadd..12071e28d958 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -238,7 +238,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
return -EINVAL;
}
}
- return iptunnel_pull_header(skb, hdr_len, tpi->proto);
+ return iptunnel_pull_header(skb, hdr_len, tpi->proto, false);
}
static void ipgre_err(struct sk_buff *skb, u32 info,
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index a6e58b6141cd..eaca2449a09a 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -86,7 +86,8 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(iptunnel_xmit);
-int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
+int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto,
+ bool xnet)
{
if (unlikely(!pskb_may_pull(skb, hdr_len)))
return -ENOMEM;
@@ -109,13 +110,10 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
skb->protocol = inner_proto;
}
- nf_reset(skb);
- secpath_reset(skb);
skb_clear_hash_if_not_l4(skb);
- skb_dst_drop(skb);
skb->vlan_tci = 0;
skb_set_queue_mapping(skb, 0);
- skb->pkt_type = PACKET_HOST;
+ skb_scrub_packet(skb, xnet);
return 0;
}
EXPORT_SYMBOL_GPL(iptunnel_pull_header);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 6ec5b42fd172..ec51d02166de 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -195,7 +195,7 @@ static int ipip_rcv(struct sk_buff *skb)
if (tunnel) {
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
- if (iptunnel_pull_header(skb, 0, tpi.proto))
+ if (iptunnel_pull_header(skb, 0, tpi.proto, false))
goto drop;
return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0625ac6356b5..f45b8ffc2840 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -740,7 +740,7 @@ static int ipip_rcv(struct sk_buff *skb)
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
- if (iptunnel_pull_header(skb, 0, tpi.proto))
+ if (iptunnel_pull_header(skb, 0, tpi.proto, false))
goto drop;
return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index a7ba23353dab..9a99f686d06f 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -127,13 +127,6 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group)
}
EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
-struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
- u32 dst_portid, gfp_t gfp_mask)
-{
- return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask);
-}
-EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb);
-
int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
unsigned int group, int echo, gfp_t flags)
{
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 8ca932057c13..11f81c8385fc 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -330,14 +330,13 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
* message. WARNING: has to be <= 128k due to slab restrictions */
n = max(inst_size, pkt_size);
- skb = nfnetlink_alloc_skb(net, n, peer_portid, GFP_ATOMIC);
+ skb = alloc_skb(n, GFP_ATOMIC);
if (!skb) {
if (n > pkt_size) {
/* try to allocate only as much as we need for current
* packet */
- skb = nfnetlink_alloc_skb(net, pkt_size,
- peer_portid, GFP_ATOMIC);
+ skb = alloc_skb(pkt_size, GFP_ATOMIC);
}
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 1d3936587ace..75429997ed41 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -301,7 +301,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
__be32 **packet_id_ptr)
{
size_t size;
- size_t data_len = 0, cap_len = 0, rem_len = 0;
+ size_t data_len = 0, cap_len = 0;
unsigned int hlen = 0;
struct sk_buff *skb;
struct nlattr *nla;
@@ -361,7 +361,6 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
hlen = min_t(unsigned int, hlen, data_len);
size += sizeof(struct nlattr) + hlen;
cap_len = entskb->len;
- rem_len = data_len - hlen;
break;
}
@@ -386,8 +385,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
size += nla_total_size(seclen);
}
- skb = __netlink_alloc_skb(net->nfnl, size, rem_len, queue->peer_portid,
- GFP_ATOMIC);
+ skb = alloc_skb(size, GFP_ATOMIC);
if (!skb) {
skb_tx_error(entskb);
return NULL;
diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig
index 2c5e95e9bfbd..5d6e8c05b3d4 100644
--- a/net/netlink/Kconfig
+++ b/net/netlink/Kconfig
@@ -2,15 +2,6 @@
# Netlink Sockets
#
-config NETLINK_MMAP
- bool "NETLINK: mmaped IO"
- ---help---
- This option enables support for memory mapped netlink IO. This
- reduces overhead by avoiding copying data between kernel- and
- userspace.
-
- If unsure, say N.
-
config NETLINK_DIAG
tristate "NETLINK: socket monitoring interface"
default n
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index f1ffb34e253f..c8416792cce0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -225,7 +225,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
dev_hold(dev);
- if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head))
+ if (is_vmalloc_addr(skb->head))
nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
else
nskb = skb_clone(skb, GFP_ATOMIC);
@@ -300,610 +300,8 @@ static void netlink_rcv_wake(struct sock *sk)
wake_up_interruptible(&nlk->wait);
}
-#ifdef CONFIG_NETLINK_MMAP
-static bool netlink_rx_is_mmaped(struct sock *sk)
-{
- return nlk_sk(sk)->rx_ring.pg_vec != NULL;
-}
-
-static bool netlink_tx_is_mmaped(struct sock *sk)
-{
- return nlk_sk(sk)->tx_ring.pg_vec != NULL;
-}
-
-static __pure struct page *pgvec_to_page(const void *addr)
-{
- if (is_vmalloc_addr(addr))
- return vmalloc_to_page(addr);
- else
- return virt_to_page(addr);
-}
-
-static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
-{
- unsigned int i;
-
- for (i = 0; i < len; i++) {
- if (pg_vec[i] != NULL) {
- if (is_vmalloc_addr(pg_vec[i]))
- vfree(pg_vec[i]);
- else
- free_pages((unsigned long)pg_vec[i], order);
- }
- }
- kfree(pg_vec);
-}
-
-static void *alloc_one_pg_vec_page(unsigned long order)
-{
- void *buffer;
- gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
- __GFP_NOWARN | __GFP_NORETRY;
-
- buffer = (void *)__get_free_pages(gfp_flags, order);
- if (buffer != NULL)
- return buffer;
-
- buffer = vzalloc((1 << order) * PAGE_SIZE);
- if (buffer != NULL)
- return buffer;
-
- gfp_flags &= ~__GFP_NORETRY;
- return (void *)__get_free_pages(gfp_flags, order);
-}
-
-static void **alloc_pg_vec(struct netlink_sock *nlk,
- struct nl_mmap_req *req, unsigned int order)
-{
- unsigned int block_nr = req->nm_block_nr;
- unsigned int i;
- void **pg_vec;
-
- pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
- if (pg_vec == NULL)
- return NULL;
-
- for (i = 0; i < block_nr; i++) {
- pg_vec[i] = alloc_one_pg_vec_page(order);
- if (pg_vec[i] == NULL)
- goto err1;
- }
-
- return pg_vec;
-err1:
- free_pg_vec(pg_vec, order, block_nr);
- return NULL;
-}
-
-
-static void
-__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
- unsigned int order)
-{
- struct netlink_sock *nlk = nlk_sk(sk);
- struct sk_buff_head *queue;
- struct netlink_ring *ring;
-
- queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
- ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
-
- spin_lock_bh(&queue->lock);
-
- ring->frame_max = req->nm_frame_nr - 1;
- ring->head = 0;
- ring->frame_size = req->nm_frame_size;
- ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
-
- swap(ring->pg_vec_len, req->nm_block_nr);
- swap(ring->pg_vec_order, order);
- swap(ring->pg_vec, pg_vec);
-
- __skb_queue_purge(queue);
- spin_unlock_bh(&queue->lock);
-
- WARN_ON(atomic_read(&nlk->mapped));
-
- if (pg_vec)
- free_pg_vec(pg_vec, order, req->nm_block_nr);
-}
-
-static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
- bool tx_ring)
-{
- struct netlink_sock *nlk = nlk_sk(sk);
- struct netlink_ring *ring;
- void **pg_vec = NULL;
- unsigned int order = 0;
-
- ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
-
- if (atomic_read(&nlk->mapped))
- return -EBUSY;
- if (atomic_read(&ring->pending))
- return -EBUSY;
-
- if (req->nm_block_nr) {
- if (ring->pg_vec != NULL)
- return -EBUSY;
-
- if ((int)req->nm_block_size <= 0)
- return -EINVAL;
- if (!PAGE_ALIGNED(req->nm_block_size))
- return -EINVAL;
- if (req->nm_frame_size < NL_MMAP_HDRLEN)
- return -EINVAL;
- if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
- return -EINVAL;
-
- ring->frames_per_block = req->nm_block_size /
- req->nm_frame_size;
- if (ring->frames_per_block == 0)
- return -EINVAL;
- if (ring->frames_per_block * req->nm_block_nr !=
- req->nm_frame_nr)
- return -EINVAL;
-
- order = get_order(req->nm_block_size);
- pg_vec = alloc_pg_vec(nlk, req, order);
- if (pg_vec == NULL)
- return -ENOMEM;
- } else {
- if (req->nm_frame_nr)
- return -EINVAL;
- }
-
- mutex_lock(&nlk->pg_vec_lock);
- if (atomic_read(&nlk->mapped) == 0) {
- __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
- mutex_unlock(&nlk->pg_vec_lock);
- return 0;
- }
-
- mutex_unlock(&nlk->pg_vec_lock);
-
- if (pg_vec)
- free_pg_vec(pg_vec, order, req->nm_block_nr);
-
- return -EBUSY;
-}
-
-static void netlink_mm_open(struct vm_area_struct *vma)
-{
- struct file *file = vma->vm_file;
- struct socket *sock = file->private_data;
- struct sock *sk = sock->sk;
-
- if (sk)
- atomic_inc(&nlk_sk(sk)->mapped);
-}
-
-static void netlink_mm_close(struct vm_area_struct *vma)
-{
- struct file *file = vma->vm_file;
- struct socket *sock = file->private_data;
- struct sock *sk = sock->sk;
-
- if (sk)
- atomic_dec(&nlk_sk(sk)->mapped);
-}
-
-static const struct vm_operations_struct netlink_mmap_ops = {
- .open = netlink_mm_open,
- .close = netlink_mm_close,
-};
-
-static int netlink_mmap(struct file *file, struct socket *sock,
- struct vm_area_struct *vma)
-{
- struct sock *sk = sock->sk;
- struct netlink_sock *nlk = nlk_sk(sk);
- struct netlink_ring *ring;
- unsigned long start, size, expected;
- unsigned int i;
- int err = -EINVAL;
-
- if (vma->vm_pgoff)
- return -EINVAL;
-
- mutex_lock(&nlk->pg_vec_lock);
-
- expected = 0;
- for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
- if (ring->pg_vec == NULL)
- continue;
- expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
- }
-
- if (expected == 0)
- goto out;
-
- size = vma->vm_end - vma->vm_start;
- if (size != expected)
- goto out;
-
- start = vma->vm_start;
- for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
- if (ring->pg_vec == NULL)
- continue;
-
- for (i = 0; i < ring->pg_vec_len; i++) {
- struct page *page;
- void *kaddr = ring->pg_vec[i];
- unsigned int pg_num;
-
- for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
- page = pgvec_to_page(kaddr);
- err = vm_insert_page(vma, start, page);
- if (err < 0)
- goto out;
- start += PAGE_SIZE;
- kaddr += PAGE_SIZE;
- }
- }
- }
-
- atomic_inc(&nlk->mapped);
- vma->vm_ops = &netlink_mmap_ops;
- err = 0;
-out:
- mutex_unlock(&nlk->pg_vec_lock);
- return err;
-}
-
-static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
-{
-#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
- struct page *p_start, *p_end;
-
- /* First page is flushed through netlink_{get,set}_status */
- p_start = pgvec_to_page(hdr + PAGE_SIZE);
- p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
- while (p_start <= p_end) {
- flush_dcache_page(p_start);
- p_start++;
- }
-#endif
-}
-
-static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
-{
- smp_rmb();
- flush_dcache_page(pgvec_to_page(hdr));
- return hdr->nm_status;
-}
-
-static void netlink_set_status(struct nl_mmap_hdr *hdr,
- enum nl_mmap_status status)
-{
- smp_mb();
- hdr->nm_status = status;
- flush_dcache_page(pgvec_to_page(hdr));
-}
-
-static struct nl_mmap_hdr *
-__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
-{
- unsigned int pg_vec_pos, frame_off;
-
- pg_vec_pos = pos / ring->frames_per_block;
- frame_off = pos % ring->frames_per_block;
-
- return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
-}
-
-static struct nl_mmap_hdr *
-netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
- enum nl_mmap_status status)
-{
- struct nl_mmap_hdr *hdr;
-
- hdr = __netlink_lookup_frame(ring, pos);
- if (netlink_get_status(hdr) != status)
- return NULL;
-
- return hdr;
-}
-
-static struct nl_mmap_hdr *
-netlink_current_frame(const struct netlink_ring *ring,
- enum nl_mmap_status status)
-{
- return netlink_lookup_frame(ring, ring->head, status);
-}
-
-static void netlink_increment_head(struct netlink_ring *ring)
-{
- ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
-}
-
-static void netlink_forward_ring(struct netlink_ring *ring)
-{
- unsigned int head = ring->head;
- const struct nl_mmap_hdr *hdr;
-
- do {
- hdr = __netlink_lookup_frame(ring, ring->head);
- if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
- break;
- if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
- break;
- netlink_increment_head(ring);
- } while (ring->head != head);
-}
-
-static bool netlink_has_valid_frame(struct netlink_ring *ring)
-{
- unsigned int head = ring->head, pos = head;
- const struct nl_mmap_hdr *hdr;
-
- do {
- hdr = __netlink_lookup_frame(ring, pos);
- if (hdr->nm_status == NL_MMAP_STATUS_VALID)
- return true;
- pos = pos != 0 ? pos - 1 : ring->frame_max;
- } while (pos != head);
-
- return false;
-}
-
-static bool netlink_dump_space(struct netlink_sock *nlk)
-{
- struct netlink_ring *ring = &nlk->rx_ring;
- struct nl_mmap_hdr *hdr;
- unsigned int n;
-
- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
- if (hdr == NULL)
- return false;
-
- n = ring->head + ring->frame_max / 2;
- if (n > ring->frame_max)
- n -= ring->frame_max;
-
- hdr = __netlink_lookup_frame(ring, n);
-
- return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
-}
-
-static unsigned int netlink_poll(struct file *file, struct socket *sock,
- poll_table *wait)
-{
- struct sock *sk = sock->sk;
- struct netlink_sock *nlk = nlk_sk(sk);
- unsigned int mask;
- int err;
-
- if (nlk->rx_ring.pg_vec != NULL) {
- /* Memory mapped sockets don't call recvmsg(), so flow control
- * for dumps is performed here. A dump is allowed to continue
- * if at least half the ring is unused.
- */
- while (nlk->cb_running && netlink_dump_space(nlk)) {
- err = netlink_dump(sk);
- if (err < 0) {
- sk->sk_err = -err;
- sk->sk_error_report(sk);
- break;
- }
- }
- netlink_rcv_wake(sk);
- }
-
- mask = datagram_poll(file, sock, wait);
-
- /* We could already have received frames in the normal receive
- * queue, that will show up as NL_MMAP_STATUS_COPY in the ring,
- * so if mask contains pollin/etc already, there's no point
- * walking the ring.
- */
- if ((mask & (POLLIN | POLLRDNORM)) != (POLLIN | POLLRDNORM)) {
- spin_lock_bh(&sk->sk_receive_queue.lock);
- if (nlk->rx_ring.pg_vec) {
- if (netlink_has_valid_frame(&nlk->rx_ring))
- mask |= POLLIN | POLLRDNORM;
- }
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- }
-
- spin_lock_bh(&sk->sk_write_queue.lock);
- if (nlk->tx_ring.pg_vec) {
- if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
- mask |= POLLOUT | POLLWRNORM;
- }
- spin_unlock_bh(&sk->sk_write_queue.lock);
-
- return mask;
-}
-
-static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
-{
- return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
-}
-
-static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
- struct netlink_ring *ring,
- struct nl_mmap_hdr *hdr)
-{
- unsigned int size;
- void *data;
-
- size = ring->frame_size - NL_MMAP_HDRLEN;
- data = (void *)hdr + NL_MMAP_HDRLEN;
-
- skb->head = data;
- skb->data = data;
- skb_reset_tail_pointer(skb);
- skb->end = skb->tail + size;
- skb->len = 0;
-
- skb->destructor = netlink_skb_destructor;
- NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
- NETLINK_CB(skb).sk = sk;
-}
-
-static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
- u32 dst_portid, u32 dst_group,
- struct scm_cookie *scm)
-{
- struct netlink_sock *nlk = nlk_sk(sk);
- struct netlink_ring *ring;
- struct nl_mmap_hdr *hdr;
- struct sk_buff *skb;
- unsigned int maxlen;
- int err = 0, len = 0;
-
- mutex_lock(&nlk->pg_vec_lock);
-
- ring = &nlk->tx_ring;
- maxlen = ring->frame_size - NL_MMAP_HDRLEN;
-
- do {
- unsigned int nm_len;
-
- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
- if (hdr == NULL) {
- if (!(msg->msg_flags & MSG_DONTWAIT) &&
- atomic_read(&nlk->tx_ring.pending))
- schedule();
- continue;
- }
-
- nm_len = ACCESS_ONCE(hdr->nm_len);
- if (nm_len > maxlen) {
- err = -EINVAL;
- goto out;
- }
-
- netlink_frame_flush_dcache(hdr, nm_len);
-
- skb = alloc_skb(nm_len, GFP_KERNEL);
- if (skb == NULL) {
- err = -ENOBUFS;
- goto out;
- }
- __skb_put(skb, nm_len);
- memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
- netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
-
- netlink_increment_head(ring);
-
- NETLINK_CB(skb).portid = nlk->portid;
- NETLINK_CB(skb).dst_group = dst_group;
- NETLINK_CB(skb).creds = scm->creds;
-
- err = security_netlink_send(sk, skb);
- if (err) {
- kfree_skb(skb);
- goto out;
- }
-
- if (unlikely(dst_group)) {
- atomic_inc(&skb->users);
- netlink_broadcast(sk, skb, dst_portid, dst_group,
- GFP_KERNEL);
- }
- err = netlink_unicast(sk, skb, dst_portid,
- msg->msg_flags & MSG_DONTWAIT);
- if (err < 0)
- goto out;
- len += err;
-
- } while (hdr != NULL ||
- (!(msg->msg_flags & MSG_DONTWAIT) &&
- atomic_read(&nlk->tx_ring.pending)));
-
- if (len > 0)
- err = len;
-out:
- mutex_unlock(&nlk->pg_vec_lock);
- return err;
-}
-
-static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
-{
- struct nl_mmap_hdr *hdr;
-
- hdr = netlink_mmap_hdr(skb);
- hdr->nm_len = skb->len;
- hdr->nm_group = NETLINK_CB(skb).dst_group;
- hdr->nm_pid = NETLINK_CB(skb).creds.pid;
- hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
- hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
- netlink_frame_flush_dcache(hdr, hdr->nm_len);
- netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
-
- NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
- kfree_skb(skb);
-}
-
-static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
-{
- struct netlink_sock *nlk = nlk_sk(sk);
- struct netlink_ring *ring = &nlk->rx_ring;
- struct nl_mmap_hdr *hdr;
-
- spin_lock_bh(&sk->sk_receive_queue.lock);
- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
- if (hdr == NULL) {
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- kfree_skb(skb);
- netlink_overrun(sk);
- return;
- }
- netlink_increment_head(ring);
- __skb_queue_tail(&sk->sk_receive_queue, skb);
- spin_unlock_bh(&sk->sk_receive_queue.lock);
-
- hdr->nm_len = skb->len;
- hdr->nm_group = NETLINK_CB(skb).dst_group;
- hdr->nm_pid = NETLINK_CB(skb).creds.pid;
- hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
- hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
- netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
-}
-
-#else /* CONFIG_NETLINK_MMAP */
-#define netlink_rx_is_mmaped(sk) false
-#define netlink_tx_is_mmaped(sk) false
-#define netlink_mmap sock_no_mmap
-#define netlink_poll datagram_poll
-#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0
-#endif /* CONFIG_NETLINK_MMAP */
-
static void netlink_skb_destructor(struct sk_buff *skb)
{
-#ifdef CONFIG_NETLINK_MMAP
- struct nl_mmap_hdr *hdr;
- struct netlink_ring *ring;
- struct sock *sk;
-
- /* If a packet from the kernel to userspace was freed because of an
- * error without being delivered to userspace, the kernel must reset
- * the status. In the direction userspace to kernel, the status is
- * always reset here after the packet was processed and freed.
- */
- if (netlink_skb_is_mmaped(skb)) {
- hdr = netlink_mmap_hdr(skb);
- sk = NETLINK_CB(skb).sk;
-
- if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
- netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
- ring = &nlk_sk(sk)->tx_ring;
- } else {
- if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
- hdr->nm_len = 0;
- netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
- }
- ring = &nlk_sk(sk)->rx_ring;
- }
-
- WARN_ON(atomic_read(&ring->pending) == 0);
- atomic_dec(&ring->pending);
- sock_put(sk);
-
- skb->head = NULL;
- }
-#endif
if (is_vmalloc_addr(skb->head)) {
if (!skb->cloned ||
!atomic_dec_return(&(skb_shinfo(skb)->dataref)))
@@ -937,18 +335,6 @@ static void netlink_sock_destruct(struct sock *sk)
}
skb_queue_purge(&sk->sk_receive_queue);
-#ifdef CONFIG_NETLINK_MMAP
- if (1) {
- struct nl_mmap_req req;
-
- memset(&req, 0, sizeof(req));
- if (nlk->rx_ring.pg_vec)
- __netlink_set_ring(sk, &req, false, NULL, 0);
- memset(&req, 0, sizeof(req));
- if (nlk->tx_ring.pg_vec)
- __netlink_set_ring(sk, &req, true, NULL, 0);
- }
-#endif /* CONFIG_NETLINK_MMAP */
if (!sock_flag(sk, SOCK_DEAD)) {
printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
@@ -1194,9 +580,6 @@ static int __netlink_create(struct net *net, struct socket *sock,
mutex_init(nlk->cb_mutex);
}
init_waitqueue_head(&nlk->wait);
-#ifdef CONFIG_NETLINK_MMAP
- mutex_init(&nlk->pg_vec_lock);
-#endif
sk->sk_destruct = netlink_sock_destruct;
sk->sk_protocol = protocol;
@@ -1728,8 +1111,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
nlk = nlk_sk(sk);
if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
- !netlink_skb_is_mmaped(skb)) {
+ test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
DECLARE_WAITQUEUE(wait, current);
if (!*timeo) {
if (!ssk || netlink_is_kernel(ssk))
@@ -1767,14 +1149,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
netlink_deliver_tap(skb);
-#ifdef CONFIG_NETLINK_MMAP
- if (netlink_skb_is_mmaped(skb))
- netlink_queue_mmaped_skb(sk, skb);
- else if (netlink_rx_is_mmaped(sk))
- netlink_ring_set_copied(sk, skb);
- else
-#endif /* CONFIG_NETLINK_MMAP */
- skb_queue_tail(&sk->sk_receive_queue, skb);
+ skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk);
return len;
}
@@ -1798,9 +1173,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
int delta;
WARN_ON(skb->sk != NULL);
- if (netlink_skb_is_mmaped(skb))
- return skb;
-
delta = skb->end - skb->tail;
if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
return skb;
@@ -1876,79 +1248,6 @@ retry:
}
EXPORT_SYMBOL(netlink_unicast);
-struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
- unsigned int ldiff, u32 dst_portid,
- gfp_t gfp_mask)
-{
-#ifdef CONFIG_NETLINK_MMAP
- unsigned int maxlen, linear_size;
- struct sock *sk = NULL;
- struct sk_buff *skb;
- struct netlink_ring *ring;
- struct nl_mmap_hdr *hdr;
-
- sk = netlink_getsockbyportid(ssk, dst_portid);
- if (IS_ERR(sk))
- goto out;
-
- ring = &nlk_sk(sk)->rx_ring;
- /* fast-path without atomic ops for common case: non-mmaped receiver */
- if (ring->pg_vec == NULL)
- goto out_put;
-
- /* We need to account the full linear size needed as a ring
- * slot cannot have non-linear parts.
- */
- linear_size = size + ldiff;
- if (ring->frame_size - NL_MMAP_HDRLEN < linear_size)
- goto out_put;
-
- skb = alloc_skb_head(gfp_mask);
- if (skb == NULL)
- goto err1;
-
- spin_lock_bh(&sk->sk_receive_queue.lock);
- /* check again under lock */
- if (ring->pg_vec == NULL)
- goto out_free;
-
- /* check again under lock */
- maxlen = ring->frame_size - NL_MMAP_HDRLEN;
- if (maxlen < linear_size)
- goto out_free;
-
- netlink_forward_ring(ring);
- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
- if (hdr == NULL)
- goto err2;
-
- netlink_ring_setup_skb(skb, sk, ring, hdr);
- netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
- atomic_inc(&ring->pending);
- netlink_increment_head(ring);
-
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- return skb;
-
-err2:
- kfree_skb(skb);
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- netlink_overrun(sk);
-err1:
- sock_put(sk);
- return NULL;
-
-out_free:
- kfree_skb(skb);
- spin_unlock_bh(&sk->sk_receive_queue.lock);
-out_put:
- sock_put(sk);
-out:
-#endif
- return alloc_skb(size, gfp_mask);
-}
-EXPORT_SYMBOL_GPL(__netlink_alloc_skb);
-
int netlink_has_listeners(struct sock *sk, unsigned int group)
{
int res = 0;
@@ -2225,8 +1524,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
if (level != SOL_NETLINK)
return -ENOPROTOOPT;
- if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING &&
- optlen >= sizeof(int) &&
+ if (optlen >= sizeof(int) &&
get_user(val, (unsigned int __user *)optval))
return -EFAULT;
@@ -2279,25 +1577,6 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
}
err = 0;
break;
-#ifdef CONFIG_NETLINK_MMAP
- case NETLINK_RX_RING:
- case NETLINK_TX_RING: {
- struct nl_mmap_req req;
-
- /* Rings might consume more memory than queue limits, require
- * CAP_NET_ADMIN.
- */
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
- if (optlen < sizeof(req))
- return -EINVAL;
- if (copy_from_user(&req, optval, sizeof(req)))
- return -EFAULT;
- err = netlink_set_ring(sk, &req,
- optname == NETLINK_TX_RING);
- break;
- }
-#endif /* CONFIG_NETLINK_MMAP */
case NETLINK_LISTEN_ALL_NSID:
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
return -EPERM;
@@ -2467,18 +1746,6 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
smp_rmb();
}
- /* It's a really convoluted way for userland to ask for mmaped
- * sendmsg(), but that's what we've got...
- */
- if (netlink_tx_is_mmaped(sk) &&
- iter_is_iovec(&msg->msg_iter) &&
- msg->msg_iter.nr_segs == 1 &&
- msg->msg_iter.iov->iov_base == NULL) {
- err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
- &scm);
- goto out;
- }
-
err = -EMSGSIZE;
if (len > sk->sk_sndbuf - 32)
goto out;
@@ -2794,8 +2061,7 @@ static int netlink_dump(struct sock *sk)
goto errout_skb;
}
- if (!netlink_rx_is_mmaped(sk) &&
- atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+ if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
goto errout_skb;
/* NLMSG_GOODSIZE is small to avoid high order allocations being
@@ -2808,15 +2074,12 @@ static int netlink_dump(struct sock *sk)
if (alloc_min_size < nlk->max_recvmsg_len) {
alloc_size = nlk->max_recvmsg_len;
- skb = netlink_alloc_skb(sk, alloc_size, nlk->portid,
- GFP_KERNEL |
- __GFP_NOWARN |
- __GFP_NORETRY);
+ skb = alloc_skb(alloc_size, GFP_KERNEL |
+ __GFP_NOWARN | __GFP_NORETRY);
}
if (!skb) {
alloc_size = alloc_min_size;
- skb = netlink_alloc_skb(sk, alloc_size, nlk->portid,
- GFP_KERNEL);
+ skb = alloc_skb(alloc_size, GFP_KERNEL);
}
if (!skb)
goto errout_skb;
@@ -2831,8 +2094,7 @@ static int netlink_dump(struct sock *sk)
* reasonable static buffer based on the expected largest dump of a
* single netdev. The outcome is MSG_TRUNC error.
*/
- if (!netlink_rx_is_mmaped(sk))
- skb_reserve(skb, skb_tailroom(skb) - alloc_size);
+ skb_reserve(skb, skb_tailroom(skb) - alloc_size);
netlink_skb_set_owner_r(skb, sk);
len = cb->dump(skb, cb);
@@ -2884,16 +2146,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
struct netlink_sock *nlk;
int ret;
- /* Memory mapped dump requests need to be copied to avoid looping
- * on the pending state in netlink_mmap_sendmsg() while the CB hold
- * a reference to the skb.
- */
- if (netlink_skb_is_mmaped(skb)) {
- skb = skb_copy(skb, GFP_KERNEL);
- if (skb == NULL)
- return -ENOBUFS;
- } else
- atomic_inc(&skb->users);
+ atomic_inc(&skb->users);
sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
if (sk == NULL) {
@@ -2966,8 +2219,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
if (!(nlk->flags & NETLINK_F_CAP_ACK) && err)
payload += nlmsg_len(nlh);
- skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload),
- NETLINK_CB(in_skb).portid, GFP_KERNEL);
+ skb = nlmsg_new(payload, GFP_KERNEL);
if (!skb) {
struct sock *sk;
@@ -3241,7 +2493,7 @@ static const struct proto_ops netlink_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = netlink_getname,
- .poll = netlink_poll,
+ .poll = datagram_poll,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -3249,7 +2501,7 @@ static const struct proto_ops netlink_ops = {
.getsockopt = netlink_getsockopt,
.sendmsg = netlink_sendmsg,
.recvmsg = netlink_recvmsg,
- .mmap = netlink_mmap,
+ .mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 14437d9b1965..e68ef9ccd703 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -44,12 +44,6 @@ struct netlink_sock {
int (*netlink_bind)(struct net *net, int group);
void (*netlink_unbind)(struct net *net, int group);
struct module *module;
-#ifdef CONFIG_NETLINK_MMAP
- struct mutex pg_vec_lock;
- struct netlink_ring rx_ring;
- struct netlink_ring tx_ring;
- atomic_t mapped;
-#endif /* CONFIG_NETLINK_MMAP */
struct rhash_head node;
struct rcu_head rcu;
@@ -60,15 +54,6 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk)
return container_of(sk, struct netlink_sock, sk);
}
-static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb)
-{
-#ifdef CONFIG_NETLINK_MMAP
- return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
-#else
- return false;
-#endif /* CONFIG_NETLINK_MMAP */
-}
-
struct netlink_table {
struct rhashtable hash;
struct hlist_head mc_list;
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 3ee63a3cff30..8dd836a8dd60 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -8,41 +8,6 @@
#include "af_netlink.h"
-#ifdef CONFIG_NETLINK_MMAP
-static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type,
- struct sk_buff *nlskb)
-{
- struct netlink_diag_ring ndr;
-
- ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
- ndr.ndr_block_nr = ring->pg_vec_len;
- ndr.ndr_frame_size = ring->frame_size;
- ndr.ndr_frame_nr = ring->frame_max + 1;
-
- return nla_put(nlskb, nl_type, sizeof(ndr), &ndr);
-}
-
-static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
-{
- struct netlink_sock *nlk = nlk_sk(sk);
- int ret;
-
- mutex_lock(&nlk->pg_vec_lock);
- ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb);
- if (!ret)
- ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING,
- nlskb);
- mutex_unlock(&nlk->pg_vec_lock);
-
- return ret;
-}
-#else
-static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
-{
- return 0;
-}
-#endif
-
static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
{
struct netlink_sock *nlk = nlk_sk(sk);
@@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
goto out_nlmsg_trim;
- if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) &&
- sk_diag_put_rings_cfg(sk, skb))
- goto out_nlmsg_trim;
-
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 0ffd721126e7..a09132a69869 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -463,26 +463,6 @@ int genl_unregister_family(struct genl_family *family)
EXPORT_SYMBOL(genl_unregister_family);
/**
- * genlmsg_new_unicast - Allocate generic netlink message for unicast
- * @payload: size of the message payload
- * @info: information on destination
- * @flags: the type of memory to allocate
- *
- * Allocates a new sk_buff large enough to cover the specified payload
- * plus required Netlink headers. Will check receiving socket for
- * memory mapped i/o capability and use it if enabled. Will fall back
- * to non-mapped skb if message size exceeds the frame size of the ring.
- */
-struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info,
- gfp_t flags)
-{
- size_t len = nlmsg_total_size(genlmsg_total_size(payload));
-
- return netlink_alloc_skb(info->dst_sk, len, info->snd_portid, flags);
-}
-EXPORT_SYMBOL_GPL(genlmsg_new_unicast);
-
-/**
* genlmsg_put - Add generic netlink header to netlink message
* @skb: socket buffer holding the message
* @portid: netlink portid the message is addressed to
@@ -642,7 +622,6 @@ static int genl_family_rcv_msg(struct genl_family *family,
info.genlhdr = nlmsg_data(nlh);
info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
info.attrs = attrbuf;
- info.dst_sk = skb->sk;
genl_info_net_set(&info, net);
memset(&info.user_ptr, 0, sizeof(info.user_ptr));
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d6f7fe92744a..c4e8455d5d56 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -422,10 +422,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
struct sk_buff *nskb = NULL;
struct sk_buff *user_skb = NULL; /* to be queued to userspace */
struct nlattr *nla;
- struct genl_info info = {
- .dst_sk = ovs_dp_get_net(dp)->genl_sock,
- .snd_portid = upcall_info->portid,
- };
size_t len;
unsigned int hlen;
int err, dp_ifindex;
@@ -466,7 +462,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
hlen = skb->len;
len = upcall_msg_size(upcall_info, hlen);
- user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
+ user_skb = genlmsg_new(len, GFP_ATOMIC);
if (!user_skb) {
err = -ENOMEM;
goto out;
@@ -876,7 +872,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
return NULL;
len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
- skb = genlmsg_new_unicast(len, info, GFP_KERNEL);
+ skb = genlmsg_new(len, GFP_KERNEL);
if (!skb)
return ERR_PTR(-ENOMEM);
@@ -1481,9 +1477,9 @@ error:
return -EMSGSIZE;
}
-static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
+static struct sk_buff *ovs_dp_cmd_alloc_info(void)
{
- return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
+ return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
}
/* Called with rcu_read_lock or ovs_mutex. */
@@ -1536,7 +1532,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
goto err;
- reply = ovs_dp_cmd_alloc_info(info);
+ reply = ovs_dp_cmd_alloc_info();
if (!reply)
return -ENOMEM;
@@ -1657,7 +1653,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
- reply = ovs_dp_cmd_alloc_info(info);
+ reply = ovs_dp_cmd_alloc_info();
if (!reply)
return -ENOMEM;
@@ -1690,7 +1686,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
- reply = ovs_dp_cmd_alloc_info(info);
+ reply = ovs_dp_cmd_alloc_info();
if (!reply)
return -ENOMEM;
@@ -1723,7 +1719,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
- reply = ovs_dp_cmd_alloc_info(info);
+ reply = ovs_dp_cmd_alloc_info();
if (!reply)
return -ENOMEM;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 2c016fdefe97..de66d8f945ed 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -1104,7 +1104,6 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
req_nlh = (struct nlmsghdr *)skb->data;
msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN;
msg.cmd = req_userhdr->cmd;
- msg.dst_sk = info->dst_sk;
msg.net = genl_info_net(info);
if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) {