diff options
44 files changed, 803 insertions, 1453 deletions
diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt deleted file mode 100644 index 54f10478e8e3..000000000000 --- a/Documentation/networking/netlink_mmap.txt +++ /dev/null @@ -1,332 +0,0 @@ -This file documents how to use memory mapped I/O with netlink. - -Author: Patrick McHardy <kaber@trash.net> - -Overview --------- - -Memory mapped netlink I/O can be used to increase throughput and decrease -overhead of unicast receive and transmit operations. Some netlink subsystems -require high throughput, these are mainly the netfilter subsystems -nfnetlink_queue and nfnetlink_log, but it can also help speed up large -dump operations of f.i. the routing database. - -Memory mapped netlink I/O used two circular ring buffers for RX and TX which -are mapped into the processes address space. - -The RX ring is used by the kernel to directly construct netlink messages into -user-space memory without copying them as done with regular socket I/O, -additionally as long as the ring contains messages no recvmsg() or poll() -syscalls have to be issued by user-space to get more message. - -The TX ring is used to process messages directly from user-space memory, the -kernel processes all messages contained in the ring using a single sendmsg() -call. - -Usage overview --------------- - -In order to use memory mapped netlink I/O, user-space needs three main changes: - -- ring setup -- conversion of the RX path to get messages from the ring instead of recvmsg() -- conversion of the TX path to construct messages into the ring - -Ring setup is done using setsockopt() to provide the ring parameters to the -kernel, then a call to mmap() to map the ring into the processes address space: - -- setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, ¶ms, sizeof(params)); -- setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, ¶ms, sizeof(params)); -- ring = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0) - -Usage of either ring is optional, but even if only the RX ring is used the -mapping still needs to be writable in order to update the frame status after -processing. - -Conversion of the reception path involves calling poll() on the file -descriptor, once the socket is readable the frames from the ring are -processed in order until no more messages are available, as indicated by -a status word in the frame header. - -On kernel side, in order to make use of memory mapped I/O on receive, the -originating netlink subsystem needs to support memory mapped I/O, otherwise -it will use an allocated socket buffer as usual and the contents will be - copied to the ring on transmission, nullifying most of the performance gains. -Dumps of kernel databases automatically support memory mapped I/O. - -Conversion of the transmit path involves changing message construction to -use memory from the TX ring instead of (usually) a buffer declared on the -stack and setting up the frame header appropriately. Optionally poll() can -be used to wait for free frames in the TX ring. - -Structured and definitions for using memory mapped I/O are contained in -<linux/netlink.h>. - -RX and TX rings ----------------- - -Each ring contains a number of continuous memory blocks, containing frames of -fixed size dependent on the parameters used for ring setup. - -Ring: [ block 0 ] - [ frame 0 ] - [ frame 1 ] - [ block 1 ] - [ frame 2 ] - [ frame 3 ] - ... - [ block n ] - [ frame 2 * n ] - [ frame 2 * n + 1 ] - -The blocks are only visible to the kernel, from the point of view of user-space -the ring just contains the frames in a continuous memory zone. - -The ring parameters used for setting up the ring are defined as follows: - -struct nl_mmap_req { - unsigned int nm_block_size; - unsigned int nm_block_nr; - unsigned int nm_frame_size; - unsigned int nm_frame_nr; -}; - -Frames are grouped into blocks, where each block is a continuous region of memory -and holds nm_block_size / nm_frame_size frames. The total number of frames in -the ring is nm_frame_nr. The following invariants hold: - -- frames_per_block = nm_block_size / nm_frame_size - -- nm_frame_nr = frames_per_block * nm_block_nr - -Some parameters are constrained, specifically: - -- nm_block_size must be a multiple of the architectures memory page size. - The getpagesize() function can be used to get the page size. - -- nm_frame_size must be equal or larger to NL_MMAP_HDRLEN, IOW a frame must be - able to hold at least the frame header - -- nm_frame_size must be smaller or equal to nm_block_size - -- nm_frame_size must be a multiple of NL_MMAP_MSG_ALIGNMENT - -- nm_frame_nr must equal the actual number of frames as specified above. - -When the kernel can't allocate physically continuous memory for a ring block, -it will fall back to use physically discontinuous memory. This might affect -performance negatively, in order to avoid this the nm_frame_size parameter -should be chosen to be as small as possible for the required frame size and -the number of blocks should be increased instead. - -Ring frames ------------- - -Each frames contain a frame header, consisting of a synchronization word and some -meta-data, and the message itself. - -Frame: [ header message ] - -The frame header is defined as follows: - -struct nl_mmap_hdr { - unsigned int nm_status; - unsigned int nm_len; - __u32 nm_group; - /* credentials */ - __u32 nm_pid; - __u32 nm_uid; - __u32 nm_gid; -}; - -- nm_status is used for synchronizing processing between the kernel and user- - space and specifies ownership of the frame as well as the operation to perform - -- nm_len contains the length of the message contained in the data area - -- nm_group specified the destination multicast group of message - -- nm_pid, nm_uid and nm_gid contain the netlink pid, UID and GID of the sending - process. These values correspond to the data available using SOCK_PASSCRED in - the SCM_CREDENTIALS cmsg. - -The possible values in the status word are: - -- NL_MMAP_STATUS_UNUSED: - RX ring: frame belongs to the kernel and contains no message - for user-space. Approriate action is to invoke poll() - to wait for new messages. - - TX ring: frame belongs to user-space and can be used for - message construction. - -- NL_MMAP_STATUS_RESERVED: - RX ring only: frame is currently used by the kernel for message - construction and contains no valid message yet. - Appropriate action is to invoke poll() to wait for - new messages. - -- NL_MMAP_STATUS_VALID: - RX ring: frame contains a valid message. Approriate action is - to process the message and release the frame back to - the kernel by setting the status to - NL_MMAP_STATUS_UNUSED or queue the frame by setting the - status to NL_MMAP_STATUS_SKIP. - - TX ring: the frame contains a valid message from user-space to - be processed by the kernel. After completing processing - the kernel will release the frame back to user-space by - setting the status to NL_MMAP_STATUS_UNUSED. - -- NL_MMAP_STATUS_COPY: - RX ring only: a message is ready to be processed but could not be - stored in the ring, either because it exceeded the - frame size or because the originating subsystem does - not support memory mapped I/O. Appropriate action is - to invoke recvmsg() to receive the message and release - the frame back to the kernel by setting the status to - NL_MMAP_STATUS_UNUSED. - -- NL_MMAP_STATUS_SKIP: - RX ring only: user-space queued the message for later processing, but - processed some messages following it in the ring. The - kernel should skip this frame when looking for unused - frames. - -The data area of a frame begins at a offset of NL_MMAP_HDRLEN relative to the -frame header. - -TX limitations --------------- - -As of Jan 2015 the message is always copied from the ring frame to an -allocated buffer due to unresolved security concerns. -See commit 4682a0358639b29cf ("netlink: Always copy on mmap TX."). - -Example -------- - -Ring setup: - - unsigned int block_size = 16 * getpagesize(); - struct nl_mmap_req req = { - .nm_block_size = block_size, - .nm_block_nr = 64, - .nm_frame_size = 16384, - .nm_frame_nr = 64 * block_size / 16384, - }; - unsigned int ring_size; - void *rx_ring, *tx_ring; - - /* Configure ring parameters */ - if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0) - exit(1); - if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0) - exit(1) - - /* Calculate size of each individual ring */ - ring_size = req.nm_block_nr * req.nm_block_size; - - /* Map RX/TX rings. The TX ring is located after the RX ring */ - rx_ring = mmap(NULL, 2 * ring_size, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); - if ((long)rx_ring == -1L) - exit(1); - tx_ring = rx_ring + ring_size: - -Message reception: - -This example assumes some ring parameters of the ring setup are available. - - unsigned int frame_offset = 0; - struct nl_mmap_hdr *hdr; - struct nlmsghdr *nlh; - unsigned char buf[16384]; - ssize_t len; - - while (1) { - struct pollfd pfds[1]; - - pfds[0].fd = fd; - pfds[0].events = POLLIN | POLLERR; - pfds[0].revents = 0; - - if (poll(pfds, 1, -1) < 0 && errno != -EINTR) - exit(1); - - /* Check for errors. Error handling omitted */ - if (pfds[0].revents & POLLERR) - <handle error> - - /* If no new messages, poll again */ - if (!(pfds[0].revents & POLLIN)) - continue; - - /* Process all frames */ - while (1) { - /* Get next frame header */ - hdr = rx_ring + frame_offset; - - if (hdr->nm_status == NL_MMAP_STATUS_VALID) { - /* Regular memory mapped frame */ - nlh = (void *)hdr + NL_MMAP_HDRLEN; - len = hdr->nm_len; - - /* Release empty message immediately. May happen - * on error during message construction. - */ - if (len == 0) - goto release; - } else if (hdr->nm_status == NL_MMAP_STATUS_COPY) { - /* Frame queued to socket receive queue */ - len = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); - if (len <= 0) - break; - nlh = buf; - } else - /* No more messages to process, continue polling */ - break; - - process_msg(nlh); -release: - /* Release frame back to the kernel */ - hdr->nm_status = NL_MMAP_STATUS_UNUSED; - - /* Advance frame offset to next frame */ - frame_offset = (frame_offset + frame_size) % ring_size; - } - } - -Message transmission: - -This example assumes some ring parameters of the ring setup are available. -A single message is constructed and transmitted, to send multiple messages -at once they would be constructed in consecutive frames before a final call -to sendto(). - - unsigned int frame_offset = 0; - struct nl_mmap_hdr *hdr; - struct nlmsghdr *nlh; - struct sockaddr_nl addr = { - .nl_family = AF_NETLINK, - }; - - hdr = tx_ring + frame_offset; - if (hdr->nm_status != NL_MMAP_STATUS_UNUSED) - /* No frame available. Use poll() to avoid. */ - exit(1); - - nlh = (void *)hdr + NL_MMAP_HDRLEN; - - /* Build message */ - build_message(nlh); - - /* Fill frame header: length and status need to be set */ - hdr->nm_len = nlh->nlmsg_len; - hdr->nm_status = NL_MMAP_STATUS_VALID; - - if (sendto(fd, NULL, 0, 0, &addr, sizeof(addr)) < 0) - exit(1); - - /* Advance frame offset to next frame */ - frame_offset = (frame_offset + frame_size) % ring_size; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h index dd9d6e6100a5..04523d41b873 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h @@ -1824,10 +1824,12 @@ struct dcbx_app_priority_entry { u8 pri_bitmap; u8 appBitfield; #define DCBX_APP_ENTRY_VALID 0x01 - #define DCBX_APP_ENTRY_SF_MASK 0x30 + #define DCBX_APP_ENTRY_SF_MASK 0xF0 #define DCBX_APP_ENTRY_SF_SHIFT 4 #define DCBX_APP_SF_ETH_TYPE 0x10 #define DCBX_APP_SF_PORT 0x20 + #define DCBX_APP_SF_UDP 0x40 + #define DCBX_APP_SF_DEFAULT 0x80 #elif defined(__LITTLE_ENDIAN) u8 appBitfield; #define DCBX_APP_ENTRY_VALID 0x01 diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index ec6e849676c1..1dac6c6111bf 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -702,6 +702,11 @@ struct doorbell_stats { u32 db_full; }; +struct hash_mac_addr { + struct list_head list; + u8 addr[ETH_ALEN]; +}; + struct adapter { void __iomem *regs; void __iomem *bar2; @@ -740,6 +745,7 @@ struct adapter { void *uld_handle[CXGB4_ULD_MAX]; struct list_head list_node; struct list_head rcu_node; + struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */ struct tid_info tids; void **tid_release_head; @@ -1207,6 +1213,24 @@ static inline int t4_wr_mbox_ns(struct adapter *adap, int mbox, const void *cmd, return t4_wr_mbox_meat(adap, mbox, cmd, size, rpl, false); } +/** + * hash_mac_addr - return the hash value of a MAC address + * @addr: the 48-bit Ethernet MAC address + * + * Hashes a MAC address according to the hash function used by HW inexact + * (hash) address matching. + */ +static inline int hash_mac_addr(const u8 *addr) +{ + u32 a = ((u32)addr[0] << 16) | ((u32)addr[1] << 8) | addr[2]; + u32 b = ((u32)addr[3] << 16) | ((u32)addr[4] << 8) | addr[5]; + + a ^= b; + a ^= (a >> 12); + a ^= (a >> 6); + return a & 0x3f; +} + void t4_write_indirect(struct adapter *adap, unsigned int addr_reg, unsigned int data_reg, const u32 *vals, unsigned int nregs, unsigned int start_idx); @@ -1389,6 +1413,9 @@ int t4_set_rxmode(struct adapter *adap, unsigned int mbox, unsigned int viid, int t4_alloc_mac_filt(struct adapter *adap, unsigned int mbox, unsigned int viid, bool free, unsigned int naddr, const u8 **addr, u16 *idx, u64 *hash, bool sleep_ok); +int t4_free_mac_filt(struct adapter *adap, unsigned int mbox, + unsigned int viid, unsigned int naddr, + const u8 **addr, bool sleep_ok); int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid, int idx, const u8 *addr, bool persist, bool add_smt); int t4_set_addr_hash(struct adapter *adap, unsigned int mbox, unsigned int viid, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index b8a5fb0c32d4..adad73f7c8cd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -338,84 +338,108 @@ void t4_os_portmod_changed(const struct adapter *adap, int port_id) netdev_info(dev, "%s module inserted\n", mod_str[pi->mod_type]); } +int dbfifo_int_thresh = 10; /* 10 == 640 entry threshold */ +module_param(dbfifo_int_thresh, int, 0644); +MODULE_PARM_DESC(dbfifo_int_thresh, "doorbell fifo interrupt threshold"); + /* - * Configure the exact and hash address filters to handle a port's multicast - * and secondary unicast MAC addresses. + * usecs to sleep while draining the dbfifo */ -static int set_addr_filters(const struct net_device *dev, bool sleep) +static int dbfifo_drain_delay = 1000; +module_param(dbfifo_drain_delay, int, 0644); +MODULE_PARM_DESC(dbfifo_drain_delay, + "usecs to sleep while draining the dbfifo"); + +static inline int cxgb4_set_addr_hash(struct port_info *pi) { + struct adapter *adap = pi->adapter; + u64 vec = 0; + bool ucast = false; + struct hash_mac_addr *entry; + + /* Calculate the hash vector for the updated list and program it */ + list_for_each_entry(entry, &adap->mac_hlist, list) { + ucast |= is_unicast_ether_addr(entry->addr); + vec |= (1ULL << hash_mac_addr(entry->addr)); + } + return t4_set_addr_hash(adap, adap->mbox, pi->viid, ucast, + vec, false); +} + +static int cxgb4_mac_sync(struct net_device *netdev, const u8 *mac_addr) +{ + struct port_info *pi = netdev_priv(netdev); + struct adapter *adap = pi->adapter; + int ret; u64 mhash = 0; u64 uhash = 0; - bool free = true; - u16 filt_idx[7]; - const u8 *addr[7]; - int ret, naddr = 0; - const struct netdev_hw_addr *ha; - int uc_cnt = netdev_uc_count(dev); - int mc_cnt = netdev_mc_count(dev); - const struct port_info *pi = netdev_priv(dev); - unsigned int mb = pi->adapter->pf; + bool free = false; + bool ucast = is_unicast_ether_addr(mac_addr); + const u8 *maclist[1] = {mac_addr}; + struct hash_mac_addr *new_entry; - /* first do the secondary unicast addresses */ - netdev_for_each_uc_addr(ha, dev) { - addr[naddr++] = ha->addr; - if (--uc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) { - ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free, - naddr, addr, filt_idx, &uhash, sleep); - if (ret < 0) - return ret; - - free = false; - naddr = 0; - } + ret = t4_alloc_mac_filt(adap, adap->mbox, pi->viid, free, 1, maclist, + NULL, ucast ? &uhash : &mhash, false); + if (ret < 0) + goto out; + /* if hash != 0, then add the addr to hash addr list + * so on the end we will calculate the hash for the + * list and program it + */ + if (uhash || mhash) { + new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC); + if (!new_entry) + return -ENOMEM; + ether_addr_copy(new_entry->addr, mac_addr); + list_add_tail(&new_entry->list, &adap->mac_hlist); + ret = cxgb4_set_addr_hash(pi); } +out: + return ret < 0 ? ret : 0; +} - /* next set up the multicast addresses */ - netdev_for_each_mc_addr(ha, dev) { - addr[naddr++] = ha->addr; - if (--mc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) { - ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free, - naddr, addr, filt_idx, &mhash, sleep); - if (ret < 0) - return ret; +static int cxgb4_mac_unsync(struct net_device *netdev, const u8 *mac_addr) +{ + struct port_info *pi = netdev_priv(netdev); + struct adapter *adap = pi->adapter; + int ret; + const u8 *maclist[1] = {mac_addr}; + struct hash_mac_addr *entry, *tmp; - free = false; - naddr = 0; + /* If the MAC address to be removed is in the hash addr + * list, delete it from the list and update hash vector + */ + list_for_each_entry_safe(entry, tmp, &adap->mac_hlist, list) { + if (ether_addr_equal(entry->addr, mac_addr)) { + list_del(&entry->list); + kfree(entry); + return cxgb4_set_addr_hash(pi); } } - return t4_set_addr_hash(pi->adapter, mb, pi->viid, uhash != 0, - uhash | mhash, sleep); + ret = t4_free_mac_filt(adap, adap->mbox, pi->viid, 1, maclist, false); + return ret < 0 ? -EINVAL : 0; } -int dbfifo_int_thresh = 10; /* 10 == 640 entry threshold */ -module_param(dbfifo_int_thresh, int, 0644); -MODULE_PARM_DESC(dbfifo_int_thresh, "doorbell fifo interrupt threshold"); - -/* - * usecs to sleep while draining the dbfifo - */ -static int dbfifo_drain_delay = 1000; -module_param(dbfifo_drain_delay, int, 0644); -MODULE_PARM_DESC(dbfifo_drain_delay, - "usecs to sleep while draining the dbfifo"); - /* * Set Rx properties of a port, such as promiscruity, address filters, and MTU. * If @mtu is -1 it is left unchanged. */ static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok) { - int ret; struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; - ret = set_addr_filters(dev, sleep_ok); - if (ret == 0) - ret = t4_set_rxmode(pi->adapter, pi->adapter->pf, pi->viid, mtu, - (dev->flags & IFF_PROMISC) ? 1 : 0, - (dev->flags & IFF_ALLMULTI) ? 1 : 0, 1, -1, - sleep_ok); - return ret; + if (!(dev->flags & IFF_PROMISC)) { + __dev_uc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync); + if (!(dev->flags & IFF_ALLMULTI)) + __dev_mc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync); + } + + return t4_set_rxmode(adapter, adapter->mbox, pi->viid, mtu, + (dev->flags & IFF_PROMISC) ? 1 : 0, + (dev->flags & IFF_ALLMULTI) ? 1 : 0, 1, -1, + sleep_ok); } /** @@ -2677,6 +2701,8 @@ static int cxgb_up(struct adapter *adap) #if IS_ENABLED(CONFIG_IPV6) update_clip(adap); #endif + /* Initialize hash mac addr list*/ + INIT_LIST_HEAD(&adap->mac_hlist); out: return err; irq_err: diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 636b4691f252..cc1736bece0f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -4433,23 +4433,6 @@ void t4_intr_disable(struct adapter *adapter) } /** - * hash_mac_addr - return the hash value of a MAC address - * @addr: the 48-bit Ethernet MAC address - * - * Hashes a MAC address according to the hash function used by HW inexact - * (hash) address matching. - */ -static int hash_mac_addr(const u8 *addr) -{ - u32 a = ((u32)addr[0] << 16) | ((u32)addr[1] << 8) | addr[2]; - u32 b = ((u32)addr[3] << 16) | ((u32)addr[4] << 8) | addr[5]; - a ^= b; - a ^= (a >> 12); - a ^= (a >> 6); - return a & 0x3f; -} - -/** * t4_config_rss_range - configure a portion of the RSS mapping table * @adapter: the adapter * @mbox: mbox to use for the FW command @@ -6738,6 +6721,81 @@ int t4_alloc_mac_filt(struct adapter *adap, unsigned int mbox, } /** + * t4_free_mac_filt - frees exact-match filters of given MAC addresses + * @adap: the adapter + * @mbox: mailbox to use for the FW command + * @viid: the VI id + * @naddr: the number of MAC addresses to allocate filters for (up to 7) + * @addr: the MAC address(es) + * @sleep_ok: call is allowed to sleep + * + * Frees the exact-match filter for each of the supplied addresses + * + * Returns a negative error number or the number of filters freed. + */ +int t4_free_mac_filt(struct adapter *adap, unsigned int mbox, + unsigned int viid, unsigned int naddr, + const u8 **addr, bool sleep_ok) +{ + int offset, ret = 0; + struct fw_vi_mac_cmd c; + unsigned int nfilters = 0; + unsigned int max_naddr = is_t4(adap->params.chip) ? + NUM_MPS_CLS_SRAM_L_INSTANCES : + NUM_MPS_T5_CLS_SRAM_L_INSTANCES; + unsigned int rem = naddr; + + if (naddr > max_naddr) + return -EINVAL; + + for (offset = 0; offset < (int)naddr ; /**/) { + unsigned int fw_naddr = (rem < ARRAY_SIZE(c.u.exact) + ? rem + : ARRAY_SIZE(c.u.exact)); + size_t len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd, + u.exact[fw_naddr]), 16); + struct fw_vi_mac_exact *p; + int i; + + memset(&c, 0, sizeof(c)); + c.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_VI_MAC_CMD) | + FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | + FW_CMD_EXEC_V(0) | + FW_VI_MAC_CMD_VIID_V(viid)); + c.freemacs_to_len16 = + cpu_to_be32(FW_VI_MAC_CMD_FREEMACS_V(0) | + FW_CMD_LEN16_V(len16)); + + for (i = 0, p = c.u.exact; i < (int)fw_naddr; i++, p++) { + p->valid_to_idx = cpu_to_be16( + FW_VI_MAC_CMD_VALID_F | + FW_VI_MAC_CMD_IDX_V(FW_VI_MAC_MAC_BASED_FREE)); + memcpy(p->macaddr, addr[offset+i], sizeof(p->macaddr)); + } + + ret = t4_wr_mbox_meat(adap, mbox, &c, sizeof(c), &c, sleep_ok); + if (ret) + break; + + for (i = 0, p = c.u.exact; i < fw_naddr; i++, p++) { + u16 index = FW_VI_MAC_CMD_IDX_G( + be16_to_cpu(p->valid_to_idx)); + + if (index < max_naddr) + nfilters++; + } + + offset += fw_naddr; + rem -= fw_naddr; + } + + if (ret == 0) + ret = nfilters; + return ret; +} + +/** * t4_change_mac - modifies the exact-match filter for a MAC address * @adap: the adapter * @mbox: mailbox to use for the FW command diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h index 6049f70e110c..4a707c32d76f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h @@ -348,6 +348,11 @@ struct sge { #define for_each_ethrxq(sge, iter) \ for (iter = 0; iter < (sge)->ethqsets; iter++) +struct hash_mac_addr { + struct list_head list; + u8 addr[ETH_ALEN]; +}; + /* * Per-"adapter" (Virtual Function) information. */ @@ -381,6 +386,9 @@ struct adapter { /* various locks */ spinlock_t stats_lock; + + /* list of MAC addresses in MPS Hash */ + struct list_head mac_hlist; }; enum { /* adapter flags */ diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 0cfa5d72cafd..8337514ababb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -741,6 +741,9 @@ static int adapter_up(struct adapter *adapter) */ enable_rx(adapter); t4vf_sge_start(adapter); + + /* Initialize hash mac addr list*/ + INIT_LIST_HEAD(&adapter->mac_hlist); return 0; } @@ -905,51 +908,74 @@ static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device return naddr; } -/* - * Configure the exact and hash address filters to handle a port's multicast - * and secondary unicast MAC addresses. - */ -static int set_addr_filters(const struct net_device *dev, bool sleep) +static inline int cxgb4vf_set_addr_hash(struct port_info *pi) { - u64 mhash = 0; - u64 uhash = 0; - bool free = true; - unsigned int offset, naddr; - const u8 *addr[7]; - int ret; - const struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; + u64 vec = 0; + bool ucast = false; + struct hash_mac_addr *entry; - /* first do the secondary unicast addresses */ - for (offset = 0; ; offset += naddr) { - naddr = collect_netdev_uc_list_addrs(dev, addr, offset, - ARRAY_SIZE(addr)); - if (naddr == 0) - break; + /* Calculate the hash vector for the updated list and program it */ + list_for_each_entry(entry, &adapter->mac_hlist, list) { + ucast |= is_unicast_ether_addr(entry->addr); + vec |= (1ULL << hash_mac_addr(entry->addr)); + } + return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false); +} - ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free, - naddr, addr, NULL, &uhash, sleep); - if (ret < 0) - return ret; +static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr) +{ + struct port_info *pi = netdev_priv(netdev); + struct adapter *adapter = pi->adapter; + int ret; + u64 mhash = 0; + u64 uhash = 0; + bool free = false; + bool ucast = is_unicast_ether_addr(mac_addr); + const u8 *maclist[1] = {mac_addr}; + struct hash_mac_addr *new_entry; - free = false; + ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist, + NULL, ucast ? &uhash : &mhash, false); + if (ret < 0) + goto out; + /* if hash != 0, then add the addr to hash addr list + * so on the end we will calculate the hash for the + * list and program it + */ + if (uhash || mhash) { + new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC); + if (!new_entry) + return -ENOMEM; + ether_addr_copy(new_entry->addr, mac_addr); + list_add_tail(&new_entry->list, &adapter->mac_hlist); + ret = cxgb4vf_set_addr_hash(pi); } +out: + return ret < 0 ? ret : 0; +} - /* next set up the multicast addresses */ - for (offset = 0; ; offset += naddr) { - naddr = collect_netdev_mc_list_addrs(dev, addr, offset, - ARRAY_SIZE(addr)); - if (naddr == 0) - break; +static int cxgb4vf_mac_unsync(struct net_device *netdev, const u8 *mac_addr) +{ + struct port_info *pi = netdev_priv(netdev); + struct adapter *adapter = pi->adapter; + int ret; + const u8 *maclist[1] = {mac_addr}; + struct hash_mac_addr *entry, *tmp; - ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free, - naddr, addr, NULL, &mhash, sleep); - if (ret < 0) - return ret; - free = false; + /* If the MAC address to be removed is in the hash addr + * list, delete it from the list and update hash vector + */ + list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) { + if (ether_addr_equal(entry->addr, mac_addr)) { + list_del(&entry->list); + kfree(entry); + return cxgb4vf_set_addr_hash(pi); + } } - return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0, - uhash | mhash, sleep); + ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false); + return ret < 0 ? -EINVAL : 0; } /* @@ -958,16 +984,18 @@ static int set_addr_filters(const struct net_device *dev, bool sleep) */ static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok) { - int ret; struct port_info *pi = netdev_priv(dev); - ret = set_addr_filters(dev, sleep_ok); - if (ret == 0) - ret = t4vf_set_rxmode(pi->adapter, pi->viid, -1, - (dev->flags & IFF_PROMISC) != 0, - (dev->flags & IFF_ALLMULTI) != 0, - 1, -1, sleep_ok); - return ret; + if (!(dev->flags & IFF_PROMISC)) { + __dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync); + if (!(dev->flags & IFF_ALLMULTI)) + __dev_mc_sync(dev, cxgb4vf_mac_sync, + cxgb4vf_mac_unsync); + } + return t4vf_set_rxmode(pi->adapter, pi->viid, -1, + (dev->flags & IFF_PROMISC) != 0, + (dev->flags & IFF_ALLMULTI) != 0, + 1, -1, sleep_ok); } /* diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index 88b8981b4751..6ce302fe1a61 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -285,6 +285,24 @@ static inline int is_t4(enum chip_type chip) return CHELSIO_CHIP_VERSION(chip) == CHELSIO_T4; } +/** + * hash_mac_addr - return the hash value of a MAC address + * @addr: the 48-bit Ethernet MAC address + * + * Hashes a MAC address according to the hash function used by hardware + * inexact (hash) address matching. + */ +static inline int hash_mac_addr(const u8 *addr) +{ + u32 a = ((u32)addr[0] << 16) | ((u32)addr[1] << 8) | addr[2]; + u32 b = ((u32)addr[3] << 16) | ((u32)addr[4] << 8) | addr[5]; + + a ^= b; + a ^= (a >> 12); + a ^= (a >> 6); + return a & 0x3f; +} + int t4vf_wait_dev_ready(struct adapter *); int t4vf_port_init(struct adapter *, int); @@ -320,6 +338,8 @@ int t4vf_set_rxmode(struct adapter *, unsigned int, int, int, int, int, int, bool); int t4vf_alloc_mac_filt(struct adapter *, unsigned int, bool, unsigned int, const u8 **, u16 *, u64 *, bool); +int t4vf_free_mac_filt(struct adapter *, unsigned int, unsigned int naddr, + const u8 **, bool); int t4vf_change_mac(struct adapter *, unsigned int, int, const u8 *, bool); int t4vf_set_addr_hash(struct adapter *, unsigned int, bool, u64, bool); int t4vf_get_port_stats(struct adapter *, int, struct t4vf_port_stats *); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index b6fa74aafe47..54220117dcba 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -236,23 +236,6 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size, return -ETIMEDOUT; } -/** - * hash_mac_addr - return the hash value of a MAC address - * @addr: the 48-bit Ethernet MAC address - * - * Hashes a MAC address according to the hash function used by hardware - * inexact (hash) address matching. - */ -static int hash_mac_addr(const u8 *addr) -{ - u32 a = ((u32)addr[0] << 16) | ((u32)addr[1] << 8) | addr[2]; - u32 b = ((u32)addr[3] << 16) | ((u32)addr[4] << 8) | addr[5]; - a ^= b; - a ^= (a >> 12); - a ^= (a >> 6); - return a & 0x3f; -} - #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\ FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \ FW_PORT_CAP_SPEED_100G | FW_PORT_CAP_ANEG) @@ -1266,6 +1249,77 @@ int t4vf_alloc_mac_filt(struct adapter *adapter, unsigned int viid, bool free, } /** + * t4vf_free_mac_filt - frees exact-match filters of given MAC addresses + * @adapter: the adapter + * @viid: the VI id + * @naddr: the number of MAC addresses to allocate filters for (up to 7) + * @addr: the MAC address(es) + * @sleep_ok: call is allowed to sleep + * + * Frees the exact-match filter for each of the supplied addresses + * + * Returns a negative error number or the number of filters freed. + */ +int t4vf_free_mac_filt(struct adapter *adapter, unsigned int viid, + unsigned int naddr, const u8 **addr, bool sleep_ok) +{ + int offset, ret = 0; + struct fw_vi_mac_cmd cmd; + unsigned int nfilters = 0; + unsigned int max_naddr = adapter->params.arch.mps_tcam_size; + unsigned int rem = naddr; + + if (naddr > max_naddr) + return -EINVAL; + + for (offset = 0; offset < (int)naddr ; /**/) { + unsigned int fw_naddr = (rem < ARRAY_SIZE(cmd.u.exact) ? + rem : ARRAY_SIZE(cmd.u.exact)); + size_t len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd, + u.exact[fw_naddr]), 16); + struct fw_vi_mac_exact *p; + int i; + + memset(&cmd, 0, sizeof(cmd)); + cmd.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_VI_MAC_CMD) | + FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | + FW_CMD_EXEC_V(0) | + FW_VI_MAC_CMD_VIID_V(viid)); + cmd.freemacs_to_len16 = + cpu_to_be32(FW_VI_MAC_CMD_FREEMACS_V(0) | + FW_CMD_LEN16_V(len16)); + + for (i = 0, p = cmd.u.exact; i < (int)fw_naddr; i++, p++) { + p->valid_to_idx = cpu_to_be16( + FW_VI_MAC_CMD_VALID_F | + FW_VI_MAC_CMD_IDX_V(FW_VI_MAC_MAC_BASED_FREE)); + memcpy(p->macaddr, addr[offset+i], sizeof(p->macaddr)); + } + + ret = t4vf_wr_mbox_core(adapter, &cmd, sizeof(cmd), &cmd, + sleep_ok); + if (ret) + break; + + for (i = 0, p = cmd.u.exact; i < fw_naddr; i++, p++) { + u16 index = FW_VI_MAC_CMD_IDX_G( + be16_to_cpu(p->valid_to_idx)); + + if (index < max_naddr) + nfilters++; + } + + offset += fw_naddr; + rem -= fw_naddr; + } + + if (ret == 0) + ret = nfilters; + return ret; +} + +/** * t4vf_change_mac - modifies the exact-match filter for a MAC address * @adapter: the adapter * @viid: the Virtual Interface ID diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 978d07a61bbf..73feaf7eedb8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -124,6 +124,8 @@ struct qed_sp_vport_update_params { u8 update_vport_active_tx_flg; u8 vport_active_tx_flg; u8 update_approx_mcast_flg; + u8 update_accept_any_vlan_flg; + u8 accept_any_vlan; unsigned long bins[8]; struct qed_rss_params *rss_params; struct qed_filter_accept_flags accept_flags; @@ -393,7 +395,9 @@ qed_sp_vport_update(struct qed_hwfn *p_hwfn, p_cmn->update_rx_active_flg = p_params->update_vport_active_rx_flg; p_cmn->tx_active_flg = p_params->vport_active_tx_flg; p_cmn->update_tx_active_flg = p_params->update_vport_active_tx_flg; - + p_cmn->accept_any_vlan = p_params->accept_any_vlan; + p_cmn->update_accept_any_vlan_flg = + p_params->update_accept_any_vlan_flg; rc = qed_sp_vport_update_rss(p_hwfn, p_ramrod, p_rss_params); if (rc) { /* Return spq entry which is taken in qed_sp_init_request()*/ @@ -444,8 +448,10 @@ static int qed_sp_vport_stop(struct qed_hwfn *p_hwfn, static int qed_filter_accept_cmd(struct qed_dev *cdev, u8 vport, struct qed_filter_accept_flags accept_flags, - enum spq_mode comp_mode, - struct qed_spq_comp_cb *p_comp_data) + u8 update_accept_any_vlan, + u8 accept_any_vlan, + enum spq_mode comp_mode, + struct qed_spq_comp_cb *p_comp_data) { struct qed_sp_vport_update_params vport_update_params; int i, rc; @@ -454,6 +460,8 @@ static int qed_filter_accept_cmd(struct qed_dev *cdev, memset(&vport_update_params, 0, sizeof(vport_update_params)); vport_update_params.vport_id = vport; vport_update_params.accept_flags = accept_flags; + vport_update_params.update_accept_any_vlan_flg = update_accept_any_vlan; + vport_update_params.accept_any_vlan = accept_any_vlan; for_each_hwfn(cdev, i) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; @@ -471,6 +479,10 @@ static int qed_filter_accept_cmd(struct qed_dev *cdev, "Accept filter configured, flags = [Rx]%x [Tx]%x\n", accept_flags.rx_accept_filter, accept_flags.tx_accept_filter); + if (update_accept_any_vlan) + DP_VERBOSE(p_hwfn, QED_MSG_SP, + "accept_any_vlan=%d configured\n", + accept_any_vlan); } return 0; @@ -1347,6 +1359,9 @@ static int qed_update_vport(struct qed_dev *cdev, params->update_vport_active_flg; sp_params.vport_active_rx_flg = params->vport_active_flg; sp_params.vport_active_tx_flg = params->vport_active_flg; + sp_params.accept_any_vlan = params->accept_any_vlan; + sp_params.update_accept_any_vlan_flg = + params->update_accept_any_vlan_flg; /* RSS - is a bit tricky, since upper-layer isn't familiar with hwfns. * We need to re-fix the rss values per engine for CMT. @@ -1566,7 +1581,7 @@ static int qed_configure_filter_rx_mode(struct qed_dev *cdev, else if (type == QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC) accept_flags.rx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; - return qed_filter_accept_cmd(cdev, 0, accept_flags, + return qed_filter_accept_cmd(cdev, 0, accept_flags, false, false, QED_SPQ_MODE_CB, NULL); } diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index f75d9e0676ce..15c5528b4f39 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -100,6 +100,12 @@ struct qede_stats { u64 tx_mac_ctrl_frames; }; +struct qede_vlan { + struct list_head list; + u16 vid; + bool configured; +}; + struct qede_dev { struct qed_dev *cdev; struct net_device *ndev; @@ -154,6 +160,10 @@ struct qede_dev { u16 q_num_rx_buffers; /* Must be a power of two */ u16 q_num_tx_buffers; /* Must be a power of two */ + struct list_head vlan_list; + u16 configured_vlans; + u16 non_configured_vlans; + bool accept_any_vlan; struct delayed_work sp_task; unsigned long sp_flags; }; diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index f50e0bd7fb2c..5f15e23a0f7d 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1140,6 +1140,21 @@ static int qede_set_ucast_rx_mac(struct qede_dev *edev, return edev->ops->filter_config(edev->cdev, &filter_cmd); } +static int qede_set_ucast_rx_vlan(struct qede_dev *edev, + enum qed_filter_xcast_params_type opcode, + u16 vid) +{ + struct qed_filter_params filter_cmd; + + memset(&filter_cmd, 0, sizeof(filter_cmd)); + filter_cmd.type = QED_FILTER_TYPE_UCAST; + filter_cmd.filter.ucast.type = opcode; + filter_cmd.filter.ucast.vlan_valid = 1; + filter_cmd.filter.ucast.vlan = vid; + + return edev->ops->filter_config(edev->cdev, &filter_cmd); +} + void qede_fill_by_demand_stats(struct qede_dev *edev) { struct qed_eth_stats stats; @@ -1252,6 +1267,247 @@ static struct rtnl_link_stats64 *qede_get_stats64( return stats; } +static void qede_config_accept_any_vlan(struct qede_dev *edev, bool action) +{ + struct qed_update_vport_params params; + int rc; + + /* Proceed only if action actually needs to be performed */ + if (edev->accept_any_vlan == action) + return; + + memset(¶ms, 0, sizeof(params)); + + params.vport_id = 0; + params.accept_any_vlan = action; + params.update_accept_any_vlan_flg = 1; + + rc = edev->ops->vport_update(edev->cdev, ¶ms); + if (rc) { + DP_ERR(edev, "Failed to %s accept-any-vlan\n", + action ? "enable" : "disable"); + } else { + DP_INFO(edev, "%s accept-any-vlan\n", + action ? "enabled" : "disabled"); + edev->accept_any_vlan = action; + } +} + +static int qede_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qede_vlan *vlan, *tmp; + int rc; + + DP_VERBOSE(edev, NETIF_MSG_IFUP, "Adding vlan 0x%04x\n", vid); + + vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); + if (!vlan) { + DP_INFO(edev, "Failed to allocate struct for vlan\n"); + return -ENOMEM; + } + INIT_LIST_HEAD(&vlan->list); + vlan->vid = vid; + vlan->configured = false; + + /* Verify vlan isn't already configured */ + list_for_each_entry(tmp, &edev->vlan_list, list) { + if (tmp->vid == vlan->vid) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "vlan already configured\n"); + kfree(vlan); + return -EEXIST; + } + } + + /* If interface is down, cache this VLAN ID and return */ + if (edev->state != QEDE_STATE_OPEN) { + DP_VERBOSE(edev, NETIF_MSG_IFDOWN, + "Interface is down, VLAN %d will be configured when interface is up\n", + vid); + if (vid != 0) + edev->non_configured_vlans++; + list_add(&vlan->list, &edev->vlan_list); + + return 0; + } + + /* Check for the filter limit. + * Note - vlan0 has a reserved filter and can be added without + * worrying about quota + */ + if ((edev->configured_vlans < edev->dev_info.num_vlan_filters) || + (vlan->vid == 0)) { + rc = qede_set_ucast_rx_vlan(edev, + QED_FILTER_XCAST_TYPE_ADD, + vlan->vid); + if (rc) { + DP_ERR(edev, "Failed to configure VLAN %d\n", + vlan->vid); + kfree(vlan); + return -EINVAL; + } + vlan->configured = true; + + /* vlan0 filter isn't consuming out of our quota */ + if (vlan->vid != 0) + edev->configured_vlans++; + } else { + /* Out of quota; Activate accept-any-VLAN mode */ + if (!edev->non_configured_vlans) + qede_config_accept_any_vlan(edev, true); + + edev->non_configured_vlans++; + } + + list_add(&vlan->list, &edev->vlan_list); + + return 0; +} + +static void qede_del_vlan_from_list(struct qede_dev *edev, + struct qede_vlan *vlan) +{ + /* vlan0 filter isn't consuming out of our quota */ + if (vlan->vid != 0) { + if (vlan->configured) + edev->configured_vlans--; + else + edev->non_configured_vlans--; + } + + list_del(&vlan->list); + kfree(vlan); +} + +static int qede_configure_vlan_filters(struct qede_dev *edev) +{ + int rc = 0, real_rc = 0, accept_any_vlan = 0; + struct qed_dev_eth_info *dev_info; + struct qede_vlan *vlan = NULL; + + if (list_empty(&edev->vlan_list)) + return 0; + + dev_info = &edev->dev_info; + + /* Configure non-configured vlans */ + list_for_each_entry(vlan, &edev->vlan_list, list) { + if (vlan->configured) + continue; + + /* We have used all our credits, now enable accept_any_vlan */ + if ((vlan->vid != 0) && + (edev->configured_vlans == dev_info->num_vlan_filters)) { + accept_any_vlan = 1; + continue; + } + + DP_VERBOSE(edev, NETIF_MSG_IFUP, "Adding vlan %d\n", vlan->vid); + + rc = qede_set_ucast_rx_vlan(edev, QED_FILTER_XCAST_TYPE_ADD, + vlan->vid); + if (rc) { + DP_ERR(edev, "Failed to configure VLAN %u\n", + vlan->vid); + real_rc = rc; + continue; + } + + vlan->configured = true; + /* vlan0 filter doesn't consume our VLAN filter's quota */ + if (vlan->vid != 0) { + edev->non_configured_vlans--; + edev->configured_vlans++; + } + } + + /* enable accept_any_vlan mode if we have more VLANs than credits, + * or remove accept_any_vlan mode if we've actually removed + * a non-configured vlan, and all remaining vlans are truly configured. + */ + + if (accept_any_vlan) + qede_config_accept_any_vlan(edev, true); + else if (!edev->non_configured_vlans) + qede_config_accept_any_vlan(edev, false); + + return real_rc; +} + +static int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qede_vlan *vlan = NULL; + int rc; + + DP_VERBOSE(edev, NETIF_MSG_IFDOWN, "Removing vlan 0x%04x\n", vid); + + /* Find whether entry exists */ + list_for_each_entry(vlan, &edev->vlan_list, list) + if (vlan->vid == vid) + break; + + if (!vlan || (vlan->vid != vid)) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "Vlan isn't configured\n"); + return 0; + } + + if (edev->state != QEDE_STATE_OPEN) { + /* As interface is already down, we don't have a VPORT + * instance to remove vlan filter. So just update vlan list + */ + DP_VERBOSE(edev, NETIF_MSG_IFDOWN, + "Interface is down, removing VLAN from list only\n"); + qede_del_vlan_from_list(edev, vlan); + return 0; + } + + /* Remove vlan */ + rc = qede_set_ucast_rx_vlan(edev, QED_FILTER_XCAST_TYPE_DEL, vid); + if (rc) { + DP_ERR(edev, "Failed to remove VLAN %d\n", vid); + return -EINVAL; + } + + qede_del_vlan_from_list(edev, vlan); + + /* We have removed a VLAN - try to see if we can + * configure non-configured VLAN from the list. + */ + rc = qede_configure_vlan_filters(edev); + + return rc; +} + +static void qede_vlan_mark_nonconfigured(struct qede_dev *edev) +{ + struct qede_vlan *vlan = NULL; + + if (list_empty(&edev->vlan_list)) + return; + + list_for_each_entry(vlan, &edev->vlan_list, list) { + if (!vlan->configured) + continue; + + vlan->configured = false; + + /* vlan0 filter isn't consuming out of our quota */ + if (vlan->vid != 0) { + edev->non_configured_vlans++; + edev->configured_vlans--; + } + + DP_VERBOSE(edev, NETIF_MSG_IFDOWN, + "marked vlan %d as non-configured\n", + vlan->vid); + } + + edev->accept_any_vlan = false; +} + static const struct net_device_ops qede_netdev_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, @@ -1260,6 +1516,8 @@ static const struct net_device_ops qede_netdev_ops = { .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = qede_change_mtu, + .ndo_vlan_rx_add_vid = qede_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = qede_vlan_rx_kill_vid, .ndo_get_stats64 = qede_get_stats64, }; @@ -1304,6 +1562,8 @@ static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev, edev->num_tc = edev->dev_info.num_tc; + INIT_LIST_HEAD(&edev->vlan_list); + return edev; } @@ -1335,7 +1595,7 @@ static void qede_init_ndev(struct qede_dev *edev) NETIF_F_HIGHDMA; ndev->features = hw_features | NETIF_F_RXHASH | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HIGHDMA | - NETIF_F_HW_VLAN_CTAG_TX; + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_TX; ndev->hw_features = hw_features; @@ -2342,6 +2602,7 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode) DP_INFO(edev, "Stopped Queues\n"); + qede_vlan_mark_nonconfigured(edev); edev->ops->fastpath_stop(edev->cdev); /* Release the interrupts */ @@ -2410,6 +2671,9 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode) edev->state = QEDE_STATE_OPEN; mutex_unlock(&edev->qede_lock); + /* Program un-configured VLANs */ + qede_configure_vlan_filters(edev); + /* Ask for link-up using current configuration */ memset(&link_params, 0, sizeof(link_params)); link_params.link_up = true; @@ -2670,6 +2934,17 @@ static void qede_config_rx_mode(struct net_device *ndev) goto out; } + /* take care of VLAN mode */ + if (ndev->flags & IFF_PROMISC) { + qede_config_accept_any_vlan(edev, true); + } else if (!edev->non_configured_vlans) { + /* It's possible that accept_any_vlan mode is set due to a + * previous setting of IFF_PROMISC. If vlan credits are + * sufficient, disable accept_any_vlan. + */ + qede_config_accept_any_vlan(edev, false); + } + rx_mode.filter.accept_flags = accept_flags; edev->ops->filter_config(edev->cdev, &rx_mode); out: diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 6f208132a574..dfbe3ca687f7 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -110,6 +110,11 @@ static __be64 vni_to_tunnel_id(const __u8 *vni) #endif } +static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) +{ + return gs->sock->sk->sk_family; +} + static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, __be32 addr, u8 vni[]) { @@ -153,58 +158,60 @@ static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) return (struct genevehdr *)(udp_hdr(skb) + 1); } -/* geneve receive/decap routine */ -static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) +static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs, + struct sk_buff *skb) { - struct genevehdr *gnvh = geneve_hdr(skb); - struct metadata_dst *tun_dst = NULL; - struct geneve_dev *geneve = NULL; - struct pcpu_sw_netstats *stats; - struct iphdr *iph = NULL; + u8 *vni; __be32 addr; static u8 zero_vni[3]; - u8 *vni; - int err = 0; - sa_family_t sa_family; #if IS_ENABLED(CONFIG_IPV6) - struct ipv6hdr *ip6h = NULL; - struct in6_addr addr6; static struct in6_addr zero_addr6; #endif - sa_family = gs->sock->sk->sk_family; + if (geneve_get_sk_family(gs) == AF_INET) { + struct iphdr *iph; - if (sa_family == AF_INET) { iph = ip_hdr(skb); /* outer IP header... */ if (gs->collect_md) { vni = zero_vni; addr = 0; } else { - vni = gnvh->vni; - + vni = geneve_hdr(skb)->vni; addr = iph->saddr; } - geneve = geneve_lookup(gs, addr, vni); + return geneve_lookup(gs, addr, vni); #if IS_ENABLED(CONFIG_IPV6) - } else if (sa_family == AF_INET6) { + } else if (geneve_get_sk_family(gs) == AF_INET6) { + struct ipv6hdr *ip6h; + struct in6_addr addr6; + ip6h = ipv6_hdr(skb); /* outer IPv6 header... */ if (gs->collect_md) { vni = zero_vni; addr6 = zero_addr6; } else { - vni = gnvh->vni; - + vni = geneve_hdr(skb)->vni; addr6 = ip6h->saddr; } - geneve = geneve6_lookup(gs, addr6, vni); + return geneve6_lookup(gs, addr6, vni); #endif } - if (!geneve) - goto drop; + return NULL; +} + +/* geneve receive/decap routine */ +static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, + struct sk_buff *skb) +{ + struct genevehdr *gnvh = geneve_hdr(skb); + struct metadata_dst *tun_dst = NULL; + struct pcpu_sw_netstats *stats; + int err = 0; + void *oiph; if (ip_tunnel_collect_metadata() || gs->collect_md) { __be16 flags; @@ -213,7 +220,7 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) (gnvh->oam ? TUNNEL_OAM : 0) | (gnvh->critical ? TUNNEL_CRIT_OPT : 0); - tun_dst = udp_tun_rx_dst(skb, sa_family, flags, + tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, vni_to_tunnel_id(gnvh->vni), gnvh->opt_len * 4); if (!tun_dst) @@ -230,7 +237,6 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) } skb_reset_mac_header(skb); - skb_scrub_packet(skb, !net_eq(geneve->net, dev_net(geneve->dev))); skb->protocol = eth_type_trans(skb, geneve->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); @@ -241,25 +247,27 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) goto drop; + oiph = skb_network_header(skb); skb_reset_network_header(skb); - if (iph) - err = IP_ECN_decapsulate(iph, skb); + if (geneve_get_sk_family(gs) == AF_INET) + err = IP_ECN_decapsulate(oiph, skb); #if IS_ENABLED(CONFIG_IPV6) - if (ip6h) - err = IP6_ECN_decapsulate(ip6h, skb); + else + err = IP6_ECN_decapsulate(oiph, skb); #endif if (unlikely(err)) { if (log_ecn_error) { - if (iph) + if (geneve_get_sk_family(gs) == AF_INET) net_info_ratelimited("non-ECT from %pI4 " "with TOS=%#x\n", - &iph->saddr, iph->tos); + &((struct iphdr *)oiph)->saddr, + ((struct iphdr *)oiph)->tos); #if IS_ENABLED(CONFIG_IPV6) - if (ip6h) + else net_info_ratelimited("non-ECT from %pI6\n", - &ip6h->saddr); + &((struct ipv6hdr *)oiph)->saddr); #endif } if (err > 1) { @@ -321,6 +329,7 @@ static void geneve_uninit(struct net_device *dev) static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct genevehdr *geneveh; + struct geneve_dev *geneve; struct geneve_sock *gs; int opts_len; @@ -336,16 +345,21 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) goto error; - opts_len = geneveh->opt_len * 4; - if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, - htons(ETH_P_TEB))) - goto drop; - gs = rcu_dereference_sk_user_data(sk); if (!gs) goto drop; - geneve_rx(gs, skb); + geneve = geneve_lookup_skb(gs, skb); + if (!geneve) + goto drop; + + opts_len = geneveh->opt_len * 4; + if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, + htons(ETH_P_TEB), + !net_eq(geneve->net, dev_net(geneve->dev)))) + goto drop; + + geneve_rx(geneve, gs, skb); return 0; drop: @@ -392,7 +406,7 @@ static void geneve_notify_add_rx_port(struct geneve_sock *gs) struct net_device *dev; struct sock *sk = gs->sock->sk; struct net *net = sock_net(sk); - sa_family_t sa_family = sk->sk_family; + sa_family_t sa_family = geneve_get_sk_family(gs); __be16 port = inet_sk(sk)->inet_sport; int err; @@ -553,7 +567,7 @@ static void geneve_notify_del_rx_port(struct geneve_sock *gs) struct net_device *dev; struct sock *sk = gs->sock->sk; struct net *net = sock_net(sk); - sa_family_t sa_family = sk->sk_family; + sa_family_t sa_family = geneve_get_sk_family(gs); __be16 port = inet_sk(sk)->inet_sport; rcu_read_lock(); @@ -596,7 +610,7 @@ static struct geneve_sock *geneve_find_sock(struct geneve_net *gn, list_for_each_entry(gs, &gn->sock_list, list) { if (inet_sk(gs->sock->sk)->inet_sport == dst_port && - inet_sk(gs->sock->sk)->sk.sk_family == family) { + geneve_get_sk_family(gs) == family) { return gs; } } diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c index 556904f572d6..03d54c4adc88 100644 --- a/drivers/net/phy/dp83848.c +++ b/drivers/net/phy/dp83848.c @@ -103,7 +103,7 @@ MODULE_DEVICE_TABLE(mdio, dp83848_tbl); static struct phy_driver dp83848_driver[] = { DP83848_PHY_DRIVER(TI_DP83848C_PHY_ID, "TI DP83848C 10/100 Mbps PHY"), - DP83848_PHY_DRIVER(NS_DP83848C_PHY_ID, "TI DP83848C 10/100 Mbps PHY"), + DP83848_PHY_DRIVER(NS_DP83848C_PHY_ID, "NS DP83848C 10/100 Mbps PHY"), DP83848_PHY_DRIVER(TLK10X_PHY_ID, "TI TLK10X 10/100 Mbps PHY"), }; module_phy_driver(dp83848_driver); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 3a84680b5117..c963897e713d 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1187,24 +1187,17 @@ out: unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS; } -static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, - struct vxlan_metadata *md, __be32 vni, +static void vxlan_rcv(struct vxlan_dev *vxlan, struct vxlan_sock *vs, + struct sk_buff *skb, struct vxlan_metadata *md, struct metadata_dst *tun_dst) { struct iphdr *oip = NULL; struct ipv6hdr *oip6 = NULL; - struct vxlan_dev *vxlan; struct pcpu_sw_netstats *stats; union vxlan_addr saddr; int err = 0; - /* Is this VNI defined? */ - vxlan = vxlan_vs_find_vni(vs, vni); - if (!vxlan) - goto drop; - skb_reset_mac_header(skb); - skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev))); skb->protocol = eth_type_trans(skb, vxlan->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); @@ -1281,6 +1274,7 @@ drop: static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct metadata_dst *tun_dst = NULL; + struct vxlan_dev *vxlan; struct vxlan_sock *vs; struct vxlanhdr unparsed; struct vxlan_metadata _md; @@ -1302,17 +1296,23 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) unparsed.vx_flags &= ~VXLAN_HF_VNI; unparsed.vx_vni &= ~VXLAN_VNI_MASK; - if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB))) - goto drop; - vs = rcu_dereference_sk_user_data(sk); if (!vs) goto drop; + vxlan = vxlan_vs_find_vni(vs, vxlan_vni(vxlan_hdr(skb)->vx_vni)); + if (!vxlan) + goto drop; + + if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB), + !net_eq(vxlan->net, dev_net(vxlan->dev)))) + goto drop; + if (vxlan_collect_metadata(vs)) { + __be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni); + tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, - vxlan_vni(vxlan_hdr(skb)->vx_vni), - sizeof(*md)); + vxlan_vni_to_tun_id(vni), sizeof(*md)); if (!tun_dst) goto drop; @@ -1343,7 +1343,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto drop; } - vxlan_rcv(vs, skb, md, vxlan_vni(vxlan_hdr(skb)->vx_vni), tun_dst); + vxlan_rcv(vxlan, vs, skb, md, tun_dst); return 0; drop: diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index ba0d9789eb6e..1d82dd5e9a08 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -34,8 +34,6 @@ int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); int nfnetlink_has_listeners(struct net *net, unsigned int group); -struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size, - u32 dst_portid, gfp_t gfp_mask); int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags); int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error); diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 0b41959aab9f..da14ab61f363 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -69,16 +69,6 @@ extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group) extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); -extern struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size, - unsigned int ldiff, u32 dst_portid, - gfp_t gfp_mask); -static inline struct sk_buff * -netlink_alloc_skb(struct sock *ssk, unsigned int size, u32 dst_portid, - gfp_t gfp_mask) -{ - return __netlink_alloc_skb(ssk, size, 0, dst_portid, gfp_mask); -} - extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 81ab178e31c1..e53b0ca49e41 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -33,6 +33,8 @@ struct qed_update_vport_params { u8 vport_id; u8 update_vport_active_flg; u8 vport_active_flg; + u8 update_accept_any_vlan_flg; + u8 accept_any_vlan; u8 update_rss_flg; struct qed_update_vport_rss_params rss_params; }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 39206751463e..89b536796e53 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3713,19 +3713,18 @@ static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb) */ static inline __wsum lco_csum(struct sk_buff *skb) { - char *inner_csum_field; - __wsum csum; + unsigned char *csum_start = skb_checksum_start(skb); + unsigned char *l4_hdr = skb_transport_header(skb); + __wsum partial; /* Start with complement of inner checksum adjustment */ - inner_csum_field = skb->data + skb_checksum_start_offset(skb) + - skb->csum_offset; - csum = ~csum_unfold(*(__force __sum16 *)inner_csum_field); + partial = ~csum_unfold(*(__force __sum16 *)(csum_start + + skb->csum_offset)); + /* Add in checksum of our headers (incl. outer checksum - * adjustment filled in by caller) + * adjustment filled in by caller) and return result. */ - csum = skb_checksum(skb, 0, skb_checksum_start_offset(skb), csum); - /* The result is the checksum from skb->data to end of packet */ - return csum; + return csum_partial(l4_hdr, csum_start - l4_hdr, partial); } #endif /* __KERNEL__ */ diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 43c0e771f417..8d4608ce8716 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -83,7 +83,6 @@ struct genl_family { * @attrs: netlink attributes * @_net: network namespace * @user_ptr: user pointers - * @dst_sk: destination socket */ struct genl_info { u32 snd_seq; @@ -94,7 +93,6 @@ struct genl_info { struct nlattr ** attrs; possible_net_t _net; void * user_ptr[2]; - struct sock * dst_sk; }; static inline struct net *genl_info_net(struct genl_info *info) @@ -188,8 +186,6 @@ int genl_unregister_family(struct genl_family *family); void genl_notify(struct genl_family *family, struct sk_buff *skb, struct genl_info *info, u32 group, gfp_t flags); -struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info, - gfp_t flags); void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, struct genl_family *family, int flags, u8 cmd); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 87408ab80856..4dd616376fec 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -270,7 +270,8 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, return INET_ECN_encapsulate(tos, inner); } -int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto); +int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, + bool xnet); void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, u8 proto, u8 tos, u8 ttl, __be16 df, bool xnet); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 6570f379aba2..f3c9857c645d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -259,8 +259,12 @@ static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) rcu_read_lock(); opt = rcu_dereference(np->opt); - if (opt && !atomic_inc_not_zero(&opt->refcnt)) - opt = NULL; + if (opt) { + if (!atomic_inc_not_zero(&opt->refcnt)) + opt = NULL; + else + opt = rcu_pointer_handoff(opt); + } rcu_read_unlock(); return opt; } diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 1b85a3b40c5a..748083de367a 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -294,6 +294,15 @@ static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id) #endif } +static inline __be64 vxlan_vni_to_tun_id(__be32 vni) +{ +#if defined(__BIG_ENDIAN) + return (__be64)vni; +#else + return (__be64)vni << 32; +#endif +} + static inline size_t vxlan_rco_start(__be32 vni_field) { return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index f095155d8749..0dba4e4ed2be 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -107,8 +107,10 @@ struct nlmsgerr { #define NETLINK_PKTINFO 3 #define NETLINK_BROADCAST_ERROR 4 #define NETLINK_NO_ENOBUFS 5 +#ifndef __KERNEL__ #define NETLINK_RX_RING 6 #define NETLINK_TX_RING 7 +#endif #define NETLINK_LISTEN_ALL_NSID 8 #define NETLINK_LIST_MEMBERSHIPS 9 #define NETLINK_CAP_ACK 10 @@ -134,6 +136,7 @@ struct nl_mmap_hdr { __u32 nm_gid; }; +#ifndef __KERNEL__ enum nl_mmap_status { NL_MMAP_STATUS_UNUSED, NL_MMAP_STATUS_RESERVED, @@ -145,6 +148,7 @@ enum nl_mmap_status { #define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO #define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) #define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) +#endif #define NET_MAJOR 36 /* Major 36 is reserved for networking */ diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h index f2159d30d1f5..d79399394b46 100644 --- a/include/uapi/linux/netlink_diag.h +++ b/include/uapi/linux/netlink_diag.h @@ -48,6 +48,8 @@ enum { #define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */ #define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */ +#ifndef __KERNEL__ #define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */ +#endif #endif diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 40197ff8918a..e9c635eae24d 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -598,7 +598,6 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state) return -ENETDOWN; br_set_state(p, state); - br_log_state(p); br_port_state_selection(p->br); return 0; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 302ab0a43725..1b5d145dfcbf 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -900,7 +900,6 @@ static inline void br_nf_core_fini(void) {} #endif /* br_stp.c */ -void br_log_state(const struct net_bridge_port *p); void br_set_state(struct net_bridge_port *p, unsigned int state); struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no); void br_init_port(struct net_bridge_port *p); diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index b3cca126b103..c22816a0b1b1 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -30,13 +30,6 @@ static const char *const br_port_state_names[] = { [BR_STATE_BLOCKING] = "blocking", }; -void br_log_state(const struct net_bridge_port *p) -{ - br_info(p->br, "port %u(%s) entered %s state\n", - (unsigned int) p->port_no, p->dev->name, - br_port_state_names[p->state]); -} - void br_set_state(struct net_bridge_port *p, unsigned int state) { struct switchdev_attr attr = { @@ -52,6 +45,10 @@ void br_set_state(struct net_bridge_port *p, unsigned int state) if (err && err != -EOPNOTSUPP) br_warn(p->br, "error setting offload STP state on port %u(%s)\n", (unsigned int) p->port_no, p->dev->name); + else + br_info(p->br, "port %u(%s) entered %s state\n", + (unsigned int) p->port_no, p->dev->name, + br_port_state_names[p->state]); } /* called under bridge lock */ @@ -126,7 +123,6 @@ static void br_root_port_block(const struct net_bridge *br, (unsigned int) p->port_no, p->dev->name); br_set_state(p, BR_STATE_LISTENING); - br_log_state(p); br_ifinfo_notify(RTM_NEWLINK, p); if (br->forward_delay > 0) @@ -407,7 +403,6 @@ static void br_make_blocking(struct net_bridge_port *p) br_topology_change_detection(p->br); br_set_state(p, BR_STATE_BLOCKING); - br_log_state(p); br_ifinfo_notify(RTM_NEWLINK, p); del_timer(&p->forward_delay_timer); @@ -431,7 +426,6 @@ static void br_make_forwarding(struct net_bridge_port *p) else br_set_state(p, BR_STATE_LEARNING); - br_log_state(p); br_ifinfo_notify(RTM_NEWLINK, p); if (br->forward_delay != 0) diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index a31ac6ad76a2..984d46263007 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -102,7 +102,6 @@ void br_stp_enable_port(struct net_bridge_port *p) { br_init_port(p); br_port_state_selection(p->br); - br_log_state(p); br_ifinfo_notify(RTM_NEWLINK, p); } @@ -118,7 +117,6 @@ void br_stp_disable_port(struct net_bridge_port *p) p->topology_change_ack = 0; p->config_pending = 0; - br_log_state(p); br_ifinfo_notify(RTM_NEWLINK, p); del_timer(&p->message_age_timer); diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 5f0f5af0ec35..da058b85aa22 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -98,7 +98,6 @@ static void br_forward_delay_timer_expired(unsigned long arg) br_topology_change_detection(br); netif_carrier_on(br->dev); } - br_log_state(p); rcu_read_lock(); br_ifinfo_notify(RTM_NEWLINK, p); rcu_read_unlock(); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 917c2c1bfadd..12071e28d958 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -238,7 +238,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, return -EINVAL; } } - return iptunnel_pull_header(skb, hdr_len, tpi->proto); + return iptunnel_pull_header(skb, hdr_len, tpi->proto, false); } static void ipgre_err(struct sk_buff *skb, u32 info, diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index a6e58b6141cd..eaca2449a09a 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -86,7 +86,8 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, } EXPORT_SYMBOL_GPL(iptunnel_xmit); -int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) +int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, + bool xnet) { if (unlikely(!pskb_may_pull(skb, hdr_len))) return -ENOMEM; @@ -109,13 +110,10 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) skb->protocol = inner_proto; } - nf_reset(skb); - secpath_reset(skb); skb_clear_hash_if_not_l4(skb); - skb_dst_drop(skb); skb->vlan_tci = 0; skb_set_queue_mapping(skb, 0); - skb->pkt_type = PACKET_HOST; + skb_scrub_packet(skb, xnet); return 0; } EXPORT_SYMBOL_GPL(iptunnel_pull_header); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 6ec5b42fd172..ec51d02166de 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -195,7 +195,7 @@ static int ipip_rcv(struct sk_buff *skb) if (tunnel) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; - if (iptunnel_pull_header(skb, 0, tpi.proto)) + if (iptunnel_pull_header(skb, 0, tpi.proto, false)) goto drop; return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 0625ac6356b5..f45b8ffc2840 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -740,7 +740,7 @@ static int ipip_rcv(struct sk_buff *skb) if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; - if (iptunnel_pull_header(skb, 0, tpi.proto)) + if (iptunnel_pull_header(skb, 0, tpi.proto, false)) goto drop; return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error); } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index a7ba23353dab..9a99f686d06f 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -127,13 +127,6 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group) } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size, - u32 dst_portid, gfp_t gfp_mask) -{ - return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask); -} -EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb); - int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags) { diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 8ca932057c13..11f81c8385fc 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -330,14 +330,13 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, * message. WARNING: has to be <= 128k due to slab restrictions */ n = max(inst_size, pkt_size); - skb = nfnetlink_alloc_skb(net, n, peer_portid, GFP_ATOMIC); + skb = alloc_skb(n, GFP_ATOMIC); if (!skb) { if (n > pkt_size) { /* try to allocate only as much as we need for current * packet */ - skb = nfnetlink_alloc_skb(net, pkt_size, - peer_portid, GFP_ATOMIC); + skb = alloc_skb(pkt_size, GFP_ATOMIC); } } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 1d3936587ace..75429997ed41 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -301,7 +301,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, __be32 **packet_id_ptr) { size_t size; - size_t data_len = 0, cap_len = 0, rem_len = 0; + size_t data_len = 0, cap_len = 0; unsigned int hlen = 0; struct sk_buff *skb; struct nlattr *nla; @@ -361,7 +361,6 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, hlen = min_t(unsigned int, hlen, data_len); size += sizeof(struct nlattr) + hlen; cap_len = entskb->len; - rem_len = data_len - hlen; break; } @@ -386,8 +385,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, size += nla_total_size(seclen); } - skb = __netlink_alloc_skb(net->nfnl, size, rem_len, queue->peer_portid, - GFP_ATOMIC); + skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { skb_tx_error(entskb); return NULL; diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig index 2c5e95e9bfbd..5d6e8c05b3d4 100644 --- a/net/netlink/Kconfig +++ b/net/netlink/Kconfig @@ -2,15 +2,6 @@ # Netlink Sockets # -config NETLINK_MMAP - bool "NETLINK: mmaped IO" - ---help--- - This option enables support for memory mapped netlink IO. This - reduces overhead by avoiding copying data between kernel- and - userspace. - - If unsure, say N. - config NETLINK_DIAG tristate "NETLINK: socket monitoring interface" default n diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f1ffb34e253f..c8416792cce0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -225,7 +225,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, dev_hold(dev); - if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) + if (is_vmalloc_addr(skb->head)) nskb = netlink_to_full_skb(skb, GFP_ATOMIC); else nskb = skb_clone(skb, GFP_ATOMIC); @@ -300,610 +300,8 @@ static void netlink_rcv_wake(struct sock *sk) wake_up_interruptible(&nlk->wait); } -#ifdef CONFIG_NETLINK_MMAP -static bool netlink_rx_is_mmaped(struct sock *sk) -{ - return nlk_sk(sk)->rx_ring.pg_vec != NULL; -} - -static bool netlink_tx_is_mmaped(struct sock *sk) -{ - return nlk_sk(sk)->tx_ring.pg_vec != NULL; -} - -static __pure struct page *pgvec_to_page(const void *addr) -{ - if (is_vmalloc_addr(addr)) - return vmalloc_to_page(addr); - else - return virt_to_page(addr); -} - -static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len) -{ - unsigned int i; - - for (i = 0; i < len; i++) { - if (pg_vec[i] != NULL) { - if (is_vmalloc_addr(pg_vec[i])) - vfree(pg_vec[i]); - else - free_pages((unsigned long)pg_vec[i], order); - } - } - kfree(pg_vec); -} - -static void *alloc_one_pg_vec_page(unsigned long order) -{ - void *buffer; - gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | - __GFP_NOWARN | __GFP_NORETRY; - - buffer = (void *)__get_free_pages(gfp_flags, order); - if (buffer != NULL) - return buffer; - - buffer = vzalloc((1 << order) * PAGE_SIZE); - if (buffer != NULL) - return buffer; - - gfp_flags &= ~__GFP_NORETRY; - return (void *)__get_free_pages(gfp_flags, order); -} - -static void **alloc_pg_vec(struct netlink_sock *nlk, - struct nl_mmap_req *req, unsigned int order) -{ - unsigned int block_nr = req->nm_block_nr; - unsigned int i; - void **pg_vec; - - pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL); - if (pg_vec == NULL) - return NULL; - - for (i = 0; i < block_nr; i++) { - pg_vec[i] = alloc_one_pg_vec_page(order); - if (pg_vec[i] == NULL) - goto err1; - } - - return pg_vec; -err1: - free_pg_vec(pg_vec, order, block_nr); - return NULL; -} - - -static void -__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, - unsigned int order) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct sk_buff_head *queue; - struct netlink_ring *ring; - - queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; - ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - - spin_lock_bh(&queue->lock); - - ring->frame_max = req->nm_frame_nr - 1; - ring->head = 0; - ring->frame_size = req->nm_frame_size; - ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; - - swap(ring->pg_vec_len, req->nm_block_nr); - swap(ring->pg_vec_order, order); - swap(ring->pg_vec, pg_vec); - - __skb_queue_purge(queue); - spin_unlock_bh(&queue->lock); - - WARN_ON(atomic_read(&nlk->mapped)); - - if (pg_vec) - free_pg_vec(pg_vec, order, req->nm_block_nr); -} - -static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, - bool tx_ring) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - void **pg_vec = NULL; - unsigned int order = 0; - - ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - - if (atomic_read(&nlk->mapped)) - return -EBUSY; - if (atomic_read(&ring->pending)) - return -EBUSY; - - if (req->nm_block_nr) { - if (ring->pg_vec != NULL) - return -EBUSY; - - if ((int)req->nm_block_size <= 0) - return -EINVAL; - if (!PAGE_ALIGNED(req->nm_block_size)) - return -EINVAL; - if (req->nm_frame_size < NL_MMAP_HDRLEN) - return -EINVAL; - if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT)) - return -EINVAL; - - ring->frames_per_block = req->nm_block_size / - req->nm_frame_size; - if (ring->frames_per_block == 0) - return -EINVAL; - if (ring->frames_per_block * req->nm_block_nr != - req->nm_frame_nr) - return -EINVAL; - - order = get_order(req->nm_block_size); - pg_vec = alloc_pg_vec(nlk, req, order); - if (pg_vec == NULL) - return -ENOMEM; - } else { - if (req->nm_frame_nr) - return -EINVAL; - } - - mutex_lock(&nlk->pg_vec_lock); - if (atomic_read(&nlk->mapped) == 0) { - __netlink_set_ring(sk, req, tx_ring, pg_vec, order); - mutex_unlock(&nlk->pg_vec_lock); - return 0; - } - - mutex_unlock(&nlk->pg_vec_lock); - - if (pg_vec) - free_pg_vec(pg_vec, order, req->nm_block_nr); - - return -EBUSY; -} - -static void netlink_mm_open(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct socket *sock = file->private_data; - struct sock *sk = sock->sk; - - if (sk) - atomic_inc(&nlk_sk(sk)->mapped); -} - -static void netlink_mm_close(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct socket *sock = file->private_data; - struct sock *sk = sock->sk; - - if (sk) - atomic_dec(&nlk_sk(sk)->mapped); -} - -static const struct vm_operations_struct netlink_mmap_ops = { - .open = netlink_mm_open, - .close = netlink_mm_close, -}; - -static int netlink_mmap(struct file *file, struct socket *sock, - struct vm_area_struct *vma) -{ - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - unsigned long start, size, expected; - unsigned int i; - int err = -EINVAL; - - if (vma->vm_pgoff) - return -EINVAL; - - mutex_lock(&nlk->pg_vec_lock); - - expected = 0; - for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { - if (ring->pg_vec == NULL) - continue; - expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE; - } - - if (expected == 0) - goto out; - - size = vma->vm_end - vma->vm_start; - if (size != expected) - goto out; - - start = vma->vm_start; - for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { - if (ring->pg_vec == NULL) - continue; - - for (i = 0; i < ring->pg_vec_len; i++) { - struct page *page; - void *kaddr = ring->pg_vec[i]; - unsigned int pg_num; - - for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) { - page = pgvec_to_page(kaddr); - err = vm_insert_page(vma, start, page); - if (err < 0) - goto out; - start += PAGE_SIZE; - kaddr += PAGE_SIZE; - } - } - } - - atomic_inc(&nlk->mapped); - vma->vm_ops = &netlink_mmap_ops; - err = 0; -out: - mutex_unlock(&nlk->pg_vec_lock); - return err; -} - -static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len) -{ -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 - struct page *p_start, *p_end; - - /* First page is flushed through netlink_{get,set}_status */ - p_start = pgvec_to_page(hdr + PAGE_SIZE); - p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1); - while (p_start <= p_end) { - flush_dcache_page(p_start); - p_start++; - } -#endif -} - -static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) -{ - smp_rmb(); - flush_dcache_page(pgvec_to_page(hdr)); - return hdr->nm_status; -} - -static void netlink_set_status(struct nl_mmap_hdr *hdr, - enum nl_mmap_status status) -{ - smp_mb(); - hdr->nm_status = status; - flush_dcache_page(pgvec_to_page(hdr)); -} - -static struct nl_mmap_hdr * -__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos) -{ - unsigned int pg_vec_pos, frame_off; - - pg_vec_pos = pos / ring->frames_per_block; - frame_off = pos % ring->frames_per_block; - - return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size); -} - -static struct nl_mmap_hdr * -netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos, - enum nl_mmap_status status) -{ - struct nl_mmap_hdr *hdr; - - hdr = __netlink_lookup_frame(ring, pos); - if (netlink_get_status(hdr) != status) - return NULL; - - return hdr; -} - -static struct nl_mmap_hdr * -netlink_current_frame(const struct netlink_ring *ring, - enum nl_mmap_status status) -{ - return netlink_lookup_frame(ring, ring->head, status); -} - -static void netlink_increment_head(struct netlink_ring *ring) -{ - ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0; -} - -static void netlink_forward_ring(struct netlink_ring *ring) -{ - unsigned int head = ring->head; - const struct nl_mmap_hdr *hdr; - - do { - hdr = __netlink_lookup_frame(ring, ring->head); - if (hdr->nm_status == NL_MMAP_STATUS_UNUSED) - break; - if (hdr->nm_status != NL_MMAP_STATUS_SKIP) - break; - netlink_increment_head(ring); - } while (ring->head != head); -} - -static bool netlink_has_valid_frame(struct netlink_ring *ring) -{ - unsigned int head = ring->head, pos = head; - const struct nl_mmap_hdr *hdr; - - do { - hdr = __netlink_lookup_frame(ring, pos); - if (hdr->nm_status == NL_MMAP_STATUS_VALID) - return true; - pos = pos != 0 ? pos - 1 : ring->frame_max; - } while (pos != head); - - return false; -} - -static bool netlink_dump_space(struct netlink_sock *nlk) -{ - struct netlink_ring *ring = &nlk->rx_ring; - struct nl_mmap_hdr *hdr; - unsigned int n; - - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) - return false; - - n = ring->head + ring->frame_max / 2; - if (n > ring->frame_max) - n -= ring->frame_max; - - hdr = __netlink_lookup_frame(ring, n); - - return hdr->nm_status == NL_MMAP_STATUS_UNUSED; -} - -static unsigned int netlink_poll(struct file *file, struct socket *sock, - poll_table *wait) -{ - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - unsigned int mask; - int err; - - if (nlk->rx_ring.pg_vec != NULL) { - /* Memory mapped sockets don't call recvmsg(), so flow control - * for dumps is performed here. A dump is allowed to continue - * if at least half the ring is unused. - */ - while (nlk->cb_running && netlink_dump_space(nlk)) { - err = netlink_dump(sk); - if (err < 0) { - sk->sk_err = -err; - sk->sk_error_report(sk); - break; - } - } - netlink_rcv_wake(sk); - } - - mask = datagram_poll(file, sock, wait); - - /* We could already have received frames in the normal receive - * queue, that will show up as NL_MMAP_STATUS_COPY in the ring, - * so if mask contains pollin/etc already, there's no point - * walking the ring. - */ - if ((mask & (POLLIN | POLLRDNORM)) != (POLLIN | POLLRDNORM)) { - spin_lock_bh(&sk->sk_receive_queue.lock); - if (nlk->rx_ring.pg_vec) { - if (netlink_has_valid_frame(&nlk->rx_ring)) - mask |= POLLIN | POLLRDNORM; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - } - - spin_lock_bh(&sk->sk_write_queue.lock); - if (nlk->tx_ring.pg_vec) { - if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED)) - mask |= POLLOUT | POLLWRNORM; - } - spin_unlock_bh(&sk->sk_write_queue.lock); - - return mask; -} - -static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb) -{ - return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN); -} - -static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, - struct netlink_ring *ring, - struct nl_mmap_hdr *hdr) -{ - unsigned int size; - void *data; - - size = ring->frame_size - NL_MMAP_HDRLEN; - data = (void *)hdr + NL_MMAP_HDRLEN; - - skb->head = data; - skb->data = data; - skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; - skb->len = 0; - - skb->destructor = netlink_skb_destructor; - NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED; - NETLINK_CB(skb).sk = sk; -} - -static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, - u32 dst_portid, u32 dst_group, - struct scm_cookie *scm) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - struct nl_mmap_hdr *hdr; - struct sk_buff *skb; - unsigned int maxlen; - int err = 0, len = 0; - - mutex_lock(&nlk->pg_vec_lock); - - ring = &nlk->tx_ring; - maxlen = ring->frame_size - NL_MMAP_HDRLEN; - - do { - unsigned int nm_len; - - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); - if (hdr == NULL) { - if (!(msg->msg_flags & MSG_DONTWAIT) && - atomic_read(&nlk->tx_ring.pending)) - schedule(); - continue; - } - - nm_len = ACCESS_ONCE(hdr->nm_len); - if (nm_len > maxlen) { - err = -EINVAL; - goto out; - } - - netlink_frame_flush_dcache(hdr, nm_len); - - skb = alloc_skb(nm_len, GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto out; - } - __skb_put(skb, nm_len); - memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); - - netlink_increment_head(ring); - - NETLINK_CB(skb).portid = nlk->portid; - NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).creds = scm->creds; - - err = security_netlink_send(sk, skb); - if (err) { - kfree_skb(skb); - goto out; - } - - if (unlikely(dst_group)) { - atomic_inc(&skb->users); - netlink_broadcast(sk, skb, dst_portid, dst_group, - GFP_KERNEL); - } - err = netlink_unicast(sk, skb, dst_portid, - msg->msg_flags & MSG_DONTWAIT); - if (err < 0) - goto out; - len += err; - - } while (hdr != NULL || - (!(msg->msg_flags & MSG_DONTWAIT) && - atomic_read(&nlk->tx_ring.pending))); - - if (len > 0) - err = len; -out: - mutex_unlock(&nlk->pg_vec_lock); - return err; -} - -static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) -{ - struct nl_mmap_hdr *hdr; - - hdr = netlink_mmap_hdr(skb); - hdr->nm_len = skb->len; - hdr->nm_group = NETLINK_CB(skb).dst_group; - hdr->nm_pid = NETLINK_CB(skb).creds.pid; - hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); - hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_frame_flush_dcache(hdr, hdr->nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_VALID); - - NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; - kfree_skb(skb); -} - -static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring = &nlk->rx_ring; - struct nl_mmap_hdr *hdr; - - spin_lock_bh(&sk->sk_receive_queue.lock); - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) { - spin_unlock_bh(&sk->sk_receive_queue.lock); - kfree_skb(skb); - netlink_overrun(sk); - return; - } - netlink_increment_head(ring); - __skb_queue_tail(&sk->sk_receive_queue, skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); - - hdr->nm_len = skb->len; - hdr->nm_group = NETLINK_CB(skb).dst_group; - hdr->nm_pid = NETLINK_CB(skb).creds.pid; - hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); - hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_set_status(hdr, NL_MMAP_STATUS_COPY); -} - -#else /* CONFIG_NETLINK_MMAP */ -#define netlink_rx_is_mmaped(sk) false -#define netlink_tx_is_mmaped(sk) false -#define netlink_mmap sock_no_mmap -#define netlink_poll datagram_poll -#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0 -#endif /* CONFIG_NETLINK_MMAP */ - static void netlink_skb_destructor(struct sk_buff *skb) { -#ifdef CONFIG_NETLINK_MMAP - struct nl_mmap_hdr *hdr; - struct netlink_ring *ring; - struct sock *sk; - - /* If a packet from the kernel to userspace was freed because of an - * error without being delivered to userspace, the kernel must reset - * the status. In the direction userspace to kernel, the status is - * always reset here after the packet was processed and freed. - */ - if (netlink_skb_is_mmaped(skb)) { - hdr = netlink_mmap_hdr(skb); - sk = NETLINK_CB(skb).sk; - - if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) { - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); - ring = &nlk_sk(sk)->tx_ring; - } else { - if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) { - hdr->nm_len = 0; - netlink_set_status(hdr, NL_MMAP_STATUS_VALID); - } - ring = &nlk_sk(sk)->rx_ring; - } - - WARN_ON(atomic_read(&ring->pending) == 0); - atomic_dec(&ring->pending); - sock_put(sk); - - skb->head = NULL; - } -#endif if (is_vmalloc_addr(skb->head)) { if (!skb->cloned || !atomic_dec_return(&(skb_shinfo(skb)->dataref))) @@ -937,18 +335,6 @@ static void netlink_sock_destruct(struct sock *sk) } skb_queue_purge(&sk->sk_receive_queue); -#ifdef CONFIG_NETLINK_MMAP - if (1) { - struct nl_mmap_req req; - - memset(&req, 0, sizeof(req)); - if (nlk->rx_ring.pg_vec) - __netlink_set_ring(sk, &req, false, NULL, 0); - memset(&req, 0, sizeof(req)); - if (nlk->tx_ring.pg_vec) - __netlink_set_ring(sk, &req, true, NULL, 0); - } -#endif /* CONFIG_NETLINK_MMAP */ if (!sock_flag(sk, SOCK_DEAD)) { printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); @@ -1194,9 +580,6 @@ static int __netlink_create(struct net *net, struct socket *sock, mutex_init(nlk->cb_mutex); } init_waitqueue_head(&nlk->wait); -#ifdef CONFIG_NETLINK_MMAP - mutex_init(&nlk->pg_vec_lock); -#endif sk->sk_destruct = netlink_sock_destruct; sk->sk_protocol = protocol; @@ -1728,8 +1111,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, nlk = nlk_sk(sk); if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_S_CONGESTED, &nlk->state)) && - !netlink_skb_is_mmaped(skb)) { + test_bit(NETLINK_S_CONGESTED, &nlk->state))) { DECLARE_WAITQUEUE(wait, current); if (!*timeo) { if (!ssk || netlink_is_kernel(ssk)) @@ -1767,14 +1149,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) netlink_deliver_tap(skb); -#ifdef CONFIG_NETLINK_MMAP - if (netlink_skb_is_mmaped(skb)) - netlink_queue_mmaped_skb(sk, skb); - else if (netlink_rx_is_mmaped(sk)) - netlink_ring_set_copied(sk, skb); - else -#endif /* CONFIG_NETLINK_MMAP */ - skb_queue_tail(&sk->sk_receive_queue, skb); + skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); return len; } @@ -1798,9 +1173,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) int delta; WARN_ON(skb->sk != NULL); - if (netlink_skb_is_mmaped(skb)) - return skb; - delta = skb->end - skb->tail; if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) return skb; @@ -1876,79 +1248,6 @@ retry: } EXPORT_SYMBOL(netlink_unicast); -struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size, - unsigned int ldiff, u32 dst_portid, - gfp_t gfp_mask) -{ -#ifdef CONFIG_NETLINK_MMAP - unsigned int maxlen, linear_size; - struct sock *sk = NULL; - struct sk_buff *skb; - struct netlink_ring *ring; - struct nl_mmap_hdr *hdr; - - sk = netlink_getsockbyportid(ssk, dst_portid); - if (IS_ERR(sk)) - goto out; - - ring = &nlk_sk(sk)->rx_ring; - /* fast-path without atomic ops for common case: non-mmaped receiver */ - if (ring->pg_vec == NULL) - goto out_put; - - /* We need to account the full linear size needed as a ring - * slot cannot have non-linear parts. - */ - linear_size = size + ldiff; - if (ring->frame_size - NL_MMAP_HDRLEN < linear_size) - goto out_put; - - skb = alloc_skb_head(gfp_mask); - if (skb == NULL) - goto err1; - - spin_lock_bh(&sk->sk_receive_queue.lock); - /* check again under lock */ - if (ring->pg_vec == NULL) - goto out_free; - - /* check again under lock */ - maxlen = ring->frame_size - NL_MMAP_HDRLEN; - if (maxlen < linear_size) - goto out_free; - - netlink_forward_ring(ring); - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) - goto err2; - - netlink_ring_setup_skb(skb, sk, ring, hdr); - netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); - atomic_inc(&ring->pending); - netlink_increment_head(ring); - - spin_unlock_bh(&sk->sk_receive_queue.lock); - return skb; - -err2: - kfree_skb(skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); - netlink_overrun(sk); -err1: - sock_put(sk); - return NULL; - -out_free: - kfree_skb(skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); -out_put: - sock_put(sk); -out: -#endif - return alloc_skb(size, gfp_mask); -} -EXPORT_SYMBOL_GPL(__netlink_alloc_skb); - int netlink_has_listeners(struct sock *sk, unsigned int group) { int res = 0; @@ -2225,8 +1524,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, if (level != SOL_NETLINK) return -ENOPROTOOPT; - if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && - optlen >= sizeof(int) && + if (optlen >= sizeof(int) && get_user(val, (unsigned int __user *)optval)) return -EFAULT; @@ -2279,25 +1577,6 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, } err = 0; break; -#ifdef CONFIG_NETLINK_MMAP - case NETLINK_RX_RING: - case NETLINK_TX_RING: { - struct nl_mmap_req req; - - /* Rings might consume more memory than queue limits, require - * CAP_NET_ADMIN. - */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (optlen < sizeof(req)) - return -EINVAL; - if (copy_from_user(&req, optval, sizeof(req))) - return -EFAULT; - err = netlink_set_ring(sk, &req, - optname == NETLINK_TX_RING); - break; - } -#endif /* CONFIG_NETLINK_MMAP */ case NETLINK_LISTEN_ALL_NSID: if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) return -EPERM; @@ -2467,18 +1746,6 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) smp_rmb(); } - /* It's a really convoluted way for userland to ask for mmaped - * sendmsg(), but that's what we've got... - */ - if (netlink_tx_is_mmaped(sk) && - iter_is_iovec(&msg->msg_iter) && - msg->msg_iter.nr_segs == 1 && - msg->msg_iter.iov->iov_base == NULL) { - err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, - &scm); - goto out; - } - err = -EMSGSIZE; if (len > sk->sk_sndbuf - 32) goto out; @@ -2794,8 +2061,7 @@ static int netlink_dump(struct sock *sk) goto errout_skb; } - if (!netlink_rx_is_mmaped(sk) && - atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) + if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto errout_skb; /* NLMSG_GOODSIZE is small to avoid high order allocations being @@ -2808,15 +2074,12 @@ static int netlink_dump(struct sock *sk) if (alloc_min_size < nlk->max_recvmsg_len) { alloc_size = nlk->max_recvmsg_len; - skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, - GFP_KERNEL | - __GFP_NOWARN | - __GFP_NORETRY); + skb = alloc_skb(alloc_size, GFP_KERNEL | + __GFP_NOWARN | __GFP_NORETRY); } if (!skb) { alloc_size = alloc_min_size; - skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, - GFP_KERNEL); + skb = alloc_skb(alloc_size, GFP_KERNEL); } if (!skb) goto errout_skb; @@ -2831,8 +2094,7 @@ static int netlink_dump(struct sock *sk) * reasonable static buffer based on the expected largest dump of a * single netdev. The outcome is MSG_TRUNC error. */ - if (!netlink_rx_is_mmaped(sk)) - skb_reserve(skb, skb_tailroom(skb) - alloc_size); + skb_reserve(skb, skb_tailroom(skb) - alloc_size); netlink_skb_set_owner_r(skb, sk); len = cb->dump(skb, cb); @@ -2884,16 +2146,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, struct netlink_sock *nlk; int ret; - /* Memory mapped dump requests need to be copied to avoid looping - * on the pending state in netlink_mmap_sendmsg() while the CB hold - * a reference to the skb. - */ - if (netlink_skb_is_mmaped(skb)) { - skb = skb_copy(skb, GFP_KERNEL); - if (skb == NULL) - return -ENOBUFS; - } else - atomic_inc(&skb->users); + atomic_inc(&skb->users); sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); if (sk == NULL) { @@ -2966,8 +2219,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) if (!(nlk->flags & NETLINK_F_CAP_ACK) && err) payload += nlmsg_len(nlh); - skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload), - NETLINK_CB(in_skb).portid, GFP_KERNEL); + skb = nlmsg_new(payload, GFP_KERNEL); if (!skb) { struct sock *sk; @@ -3241,7 +2493,7 @@ static const struct proto_ops netlink_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = netlink_getname, - .poll = netlink_poll, + .poll = datagram_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -3249,7 +2501,7 @@ static const struct proto_ops netlink_ops = { .getsockopt = netlink_getsockopt, .sendmsg = netlink_sendmsg, .recvmsg = netlink_recvmsg, - .mmap = netlink_mmap, + .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 14437d9b1965..e68ef9ccd703 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -44,12 +44,6 @@ struct netlink_sock { int (*netlink_bind)(struct net *net, int group); void (*netlink_unbind)(struct net *net, int group); struct module *module; -#ifdef CONFIG_NETLINK_MMAP - struct mutex pg_vec_lock; - struct netlink_ring rx_ring; - struct netlink_ring tx_ring; - atomic_t mapped; -#endif /* CONFIG_NETLINK_MMAP */ struct rhash_head node; struct rcu_head rcu; @@ -60,15 +54,6 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk) return container_of(sk, struct netlink_sock, sk); } -static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb) -{ -#ifdef CONFIG_NETLINK_MMAP - return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; -#else - return false; -#endif /* CONFIG_NETLINK_MMAP */ -} - struct netlink_table { struct rhashtable hash; struct hlist_head mc_list; diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 3ee63a3cff30..8dd836a8dd60 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -8,41 +8,6 @@ #include "af_netlink.h" -#ifdef CONFIG_NETLINK_MMAP -static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type, - struct sk_buff *nlskb) -{ - struct netlink_diag_ring ndr; - - ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT; - ndr.ndr_block_nr = ring->pg_vec_len; - ndr.ndr_frame_size = ring->frame_size; - ndr.ndr_frame_nr = ring->frame_max + 1; - - return nla_put(nlskb, nl_type, sizeof(ndr), &ndr); -} - -static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) -{ - struct netlink_sock *nlk = nlk_sk(sk); - int ret; - - mutex_lock(&nlk->pg_vec_lock); - ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb); - if (!ret) - ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING, - nlskb); - mutex_unlock(&nlk->pg_vec_lock); - - return ret; -} -#else -static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) -{ - return 0; -} -#endif - static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) { struct netlink_sock *nlk = nlk_sk(sk); @@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) goto out_nlmsg_trim; - if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) && - sk_diag_put_rings_cfg(sk, skb)) - goto out_nlmsg_trim; - nlmsg_end(skb, nlh); return 0; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 0ffd721126e7..a09132a69869 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -463,26 +463,6 @@ int genl_unregister_family(struct genl_family *family) EXPORT_SYMBOL(genl_unregister_family); /** - * genlmsg_new_unicast - Allocate generic netlink message for unicast - * @payload: size of the message payload - * @info: information on destination - * @flags: the type of memory to allocate - * - * Allocates a new sk_buff large enough to cover the specified payload - * plus required Netlink headers. Will check receiving socket for - * memory mapped i/o capability and use it if enabled. Will fall back - * to non-mapped skb if message size exceeds the frame size of the ring. - */ -struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info, - gfp_t flags) -{ - size_t len = nlmsg_total_size(genlmsg_total_size(payload)); - - return netlink_alloc_skb(info->dst_sk, len, info->snd_portid, flags); -} -EXPORT_SYMBOL_GPL(genlmsg_new_unicast); - -/** * genlmsg_put - Add generic netlink header to netlink message * @skb: socket buffer holding the message * @portid: netlink portid the message is addressed to @@ -642,7 +622,6 @@ static int genl_family_rcv_msg(struct genl_family *family, info.genlhdr = nlmsg_data(nlh); info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; info.attrs = attrbuf; - info.dst_sk = skb->sk; genl_info_net_set(&info, net); memset(&info.user_ptr, 0, sizeof(info.user_ptr)); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d6f7fe92744a..c4e8455d5d56 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -422,10 +422,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, struct sk_buff *nskb = NULL; struct sk_buff *user_skb = NULL; /* to be queued to userspace */ struct nlattr *nla; - struct genl_info info = { - .dst_sk = ovs_dp_get_net(dp)->genl_sock, - .snd_portid = upcall_info->portid, - }; size_t len; unsigned int hlen; int err, dp_ifindex; @@ -466,7 +462,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, hlen = skb->len; len = upcall_msg_size(upcall_info, hlen); - user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); + user_skb = genlmsg_new(len, GFP_ATOMIC); if (!user_skb) { err = -ENOMEM; goto out; @@ -876,7 +872,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act return NULL; len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags); - skb = genlmsg_new_unicast(len, info, GFP_KERNEL); + skb = genlmsg_new(len, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); @@ -1481,9 +1477,9 @@ error: return -EMSGSIZE; } -static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info) +static struct sk_buff *ovs_dp_cmd_alloc_info(void) { - return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); + return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL); } /* Called with rcu_read_lock or ovs_mutex. */ @@ -1536,7 +1532,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) goto err; - reply = ovs_dp_cmd_alloc_info(info); + reply = ovs_dp_cmd_alloc_info(); if (!reply) return -ENOMEM; @@ -1657,7 +1653,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; - reply = ovs_dp_cmd_alloc_info(info); + reply = ovs_dp_cmd_alloc_info(); if (!reply) return -ENOMEM; @@ -1690,7 +1686,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; - reply = ovs_dp_cmd_alloc_info(info); + reply = ovs_dp_cmd_alloc_info(); if (!reply) return -ENOMEM; @@ -1723,7 +1719,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; - reply = ovs_dp_cmd_alloc_info(info); + reply = ovs_dp_cmd_alloc_info(); if (!reply) return -ENOMEM; diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 2c016fdefe97..de66d8f945ed 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1104,7 +1104,6 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) req_nlh = (struct nlmsghdr *)skb->data; msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN; msg.cmd = req_userhdr->cmd; - msg.dst_sk = info->dst_sk; msg.net = genl_info_net(info); if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) { |