summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/ulp
diff options
context:
space:
mode:
authorLinus Torvalds2017-05-03 21:45:55 +0200
committerLinus Torvalds2017-05-03 21:45:55 +0200
commit1684096b1ed813f621fb6cbd06e72235c1c2a0ca (patch)
tree13a228c35d6344f5d23b2c195aa3b026e42aac4b /drivers/infiniband/ulp
parentMerge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dto... (diff)
parentinfiniband: avoid dereferencing uninitialized dst on error path (diff)
downloadkernel-qcow2-linux-1684096b1ed813f621fb6cbd06e72235c1c2a0ca.tar.gz
kernel-qcow2-linux-1684096b1ed813f621fb6cbd06e72235c1c2a0ca.tar.xz
kernel-qcow2-linux-1684096b1ed813f621fb6cbd06e72235c1c2a0ca.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford: "More exchaustive description of primary updates in this release: - Lots of driver fixes and misc fixes across the board. - I had to base on a net-next tree because the IPoIB Accelorator patches needed it. Unfortunately, it was known to Mellanox that there would need to be an IPoIB accelorator patch to the net tree (which left some functions turned off by an #ifdef construct to avoid warnings about defined but unused functions), then one to the RDMA tree, then a fixup that went back and re-enabled the functions in the net tree and enabled their use in the rdma tree Also, a sparse fix was sent to the net tree after I did my pull, and the fixup patch conflicts quite directly with that sparse fix, so I'm going to submit the fixup patch towards the end of the merge window by itself and based upon your master branch at the time. - Two separate rounds of hfi1 fixes, one that got dropped from last release because it came in just a day or two before the end of the merge window and then the one from this release cycle. Of note is that I now have a third series that just landed from Intel yesterday. It is not included in this pull request, but I may submit it by the end of the week. I'll talk to Intel about improving the timing of thier submissions for my workflow. - Changes to our idr usage in the RDMA subsystem that will tie into our cgroup management and also into the upcoming changes for the RDMA kernel<->userspace API. - Addition of support for a netdev to be tied to an RDMA device at the core level - Addition of the VNIC driver from Intel. While IPoIB provides IP over InfiniBand (and *only* IP, no lower layer protocol headers are allowed or supported), the VNIC driver presents a virtual Ethernet device with support for things like varying Ethertypes, VLANs, priorities and other features of Ethernet. The virtual devices are centrally managed by the OPA fabric manager, making this (for the time being) a strictly OPA specific feature. - Improvements to the On-Demand Paging support in the RDMA subsystem. - Addition of three significant OPA changes. While we added OPA support some time ago (via the hfi1 driver), the RDMA subsystem has so far glossed over the areas where OPA and InfiniBand differ. With this release we are starting to add support for the OPA extensions into the RDMA core in the following area: Extended port information for OPA is now supported, extended Address Handle attributes for OPA are now supported, and extended SA Queries to get OPA specific subnet information is now supported. Concise summary from the tag: - idr usage and locking changes - build fix for hns - ipoib debug path record file fix - hfi1 updates - core RDMA netdev addition - Intel VNIC driver addition - Enhanced accelerators for IPoIB addition - Debug cleanups in cxgb3/4 - Trivial cleanups from SF Markus Elfring - Misc rxe fixes from Mellanox - Misc ipoib fixes from Mellanox - Lots of mlx4/mlx5 changes from Mellanox - Misc fixes across the RDMA subsystem - ODP paging fixes and improvements - qedr updates - hfi1 updates - OPA port info patches - OPA AH patches - OPA SA Query patches" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (191 commits) infiniband: avoid dereferencing uninitialized dst on error path IB/SA: Add OPA addr header IB/mlx5: Add port_xmit_wait to counter registers read IB/ocrdma: fix out of bounds access to local buffer IB/mlx4: Fix incorrect order of formal and actual parameters IB/mlx4: Change flush logic so it adheres to the variable name mlx5: Fix mlx5_ib_map_mr_sg mr length IB/rxe: Don't clamp residual length to mtu IB/SA: Add support to query OPA path records IB/SA: Add OPA path record type IB/SA: Split struct sa_path_rec based on IB and ROCE specific fields IB/SA: Introduce path record specific types IB/SA: Rename ib_sa_path_rec to sa_path_rec IB/CM: Add braces when using sizeof IB/core: Define 'opa' rdma_ah_attr type IB/core: Define 'ib' and 'roce' rdma_ah_attr types IB/core: Use rdma_ah_attr accessor functions IB/core: Add accessor functions for rdma_ah_attr fields IB/PVRDMA: Rename ib_ah_attr related functions IB/mthca: Rename to_ib_ah_attr to to_rdma_ah_attr ...
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r--drivers/infiniband/ulp/Makefile1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h44
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c73
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c13
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c364
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c456
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c120
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c13
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c64
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c12
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c2
-rw-r--r--drivers/infiniband/ulp/opa_vnic/Kconfig8
-rw-r--r--drivers/infiniband/ulp/opa_vnic/Makefile7
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c475
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h489
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c187
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h329
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c389
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c1056
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c390
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c13
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h2
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c4
24 files changed, 3997 insertions, 520 deletions
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile
index f3c7dcf03098..c28af1823a2d 100644
--- a/drivers/infiniband/ulp/Makefile
+++ b/drivers/infiniband/ulp/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_INFINIBAND_SRP) += srp/
obj-$(CONFIG_INFINIBAND_SRPT) += srpt/
obj-$(CONFIG_INFINIBAND_ISER) += iser/
obj-$(CONFIG_INFINIBAND_ISERT) += isert/
+obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic/
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index bed233bf45c3..ff50a7bd66d8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -52,7 +52,6 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_sa.h>
#include <linux/sched.h>
-
/* constants */
enum ipoib_flush_level {
@@ -153,6 +152,13 @@ static inline void skb_add_pseudo_hdr(struct sk_buff *skb)
skb_pull(skb, IPOIB_HARD_LEN);
}
+static inline struct ipoib_dev_priv *ipoib_priv(const struct net_device *dev)
+{
+ struct rdma_netdev *rn = netdev_priv(dev);
+
+ return rn->clnt_priv;
+}
+
/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
struct ipoib_mcast {
struct ib_sa_mcmember_rec mcmember;
@@ -404,6 +410,7 @@ struct ipoib_dev_priv {
struct timer_list poll_timer;
unsigned max_send_sge;
bool sm_fullmember_sendonly_support;
+ const struct net_device_ops *rn_ops;
};
struct ipoib_ah {
@@ -416,7 +423,7 @@ struct ipoib_ah {
struct ipoib_path {
struct net_device *dev;
- struct ib_sa_path_rec pathrec;
+ struct sa_path_rec pathrec;
struct ipoib_ah *ah;
struct sk_buff_head queue;
@@ -472,7 +479,7 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
- struct ib_pd *pd, struct ib_ah_attr *attr);
+ struct ib_pd *pd, struct rdma_ah_attr *attr);
void ipoib_free_ah(struct kref *kref);
static inline void ipoib_put_ah(struct ipoib_ah *ah)
{
@@ -482,27 +489,28 @@ int ipoib_open(struct net_device *dev);
int ipoib_add_pkey_attr(struct net_device *dev);
int ipoib_add_umcast_attr(struct net_device *dev);
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
- struct ipoib_ah *address, u32 qpn);
+int ipoib_send(struct net_device *dev, struct sk_buff *skb,
+ struct ib_ah *address, u32 dqpn);
void ipoib_reap_ah(struct work_struct *work);
struct ipoib_path *__path_find(struct net_device *dev, void *gid);
void ipoib_mark_paths_invalid(struct net_device *dev);
void ipoib_flush_paths(struct net_device *dev);
-int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
-struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
-
-int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
+struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
+ const char *format);
+void ipoib_ib_tx_timer_func(unsigned long ctx);
void ipoib_ib_dev_flush_light(struct work_struct *work);
void ipoib_ib_dev_flush_normal(struct work_struct *work);
void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
+int ipoib_ib_dev_open_default(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev);
void ipoib_ib_dev_up(struct net_device *dev);
void ipoib_ib_dev_down(struct net_device *dev);
-void ipoib_ib_dev_stop(struct net_device *dev);
+int ipoib_ib_dev_stop_default(struct net_device *dev);
void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -562,8 +570,10 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
struct ipoib_path *path);
#endif
-int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
- union ib_gid *mgid, int set_qkey);
+int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey);
+int ipoib_mcast_detach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid);
void ipoib_mcast_remove_list(struct list_head *remove_list);
void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
struct list_head *remove_list);
@@ -587,7 +597,7 @@ void __exit ipoib_netlink_fini(void);
void ipoib_set_umcast(struct net_device *ndev, int umcast_val);
int ipoib_set_mode(struct net_device *dev, const char *buf);
-void ipoib_setup(struct net_device *dev);
+void ipoib_setup_common(struct net_device *dev);
void ipoib_pkey_open(struct ipoib_dev_priv *priv);
void ipoib_drain_cq(struct net_device *dev);
@@ -607,14 +617,14 @@ extern int ipoib_max_conn_qp;
static inline int ipoib_cm_admin_enabled(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return IPOIB_CM_SUPPORTED(dev->dev_addr) &&
test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
}
static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return IPOIB_CM_SUPPORTED(hwaddr) &&
test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
}
@@ -637,13 +647,13 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t
static inline int ipoib_cm_has_srq(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return !!priv->cm.srq;
}
static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return priv->cm.max_cm_mtu;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 0cdf2b7f272f..7cbcfdac6529 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -92,7 +92,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_recv_wr *bad_wr;
int i, ret;
@@ -118,7 +118,7 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
struct ib_recv_wr *wr,
struct ib_sge *sge, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_recv_wr *bad_wr;
int i, ret;
@@ -145,7 +145,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
u64 mapping[IPOIB_CM_RX_SG],
gfp_t gfp)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sk_buff *skb;
int i;
@@ -196,7 +196,7 @@ partial_error:
static void ipoib_cm_free_rx_ring(struct net_device *dev,
struct ipoib_cm_rx_buf *rx_ring)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i;
for (i = 0; i < ipoib_recvq_size; ++i)
@@ -235,7 +235,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
{
struct ipoib_cm_rx *p = ctx;
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
unsigned long flags;
if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
@@ -251,7 +251,7 @@ static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
struct ipoib_cm_rx *p)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_init_attr attr = {
.event_handler = ipoib_cm_rx_event_handler,
.send_cq = priv->recv_cq, /* For drain WR */
@@ -276,7 +276,7 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
struct ib_cm_id *cm_id, struct ib_qp *qp,
unsigned psn)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
@@ -331,7 +331,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev,
struct ib_recv_wr *wr,
struct ib_sge *sge)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i;
for (i = 0; i < priv->cm.num_frags; ++i)
@@ -349,7 +349,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev,
static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
struct ipoib_cm_rx *rx)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct {
struct ib_recv_wr wr;
struct ib_sge sge[IPOIB_CM_RX_SG];
@@ -422,7 +422,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
struct ib_qp *qp, struct ib_cm_req_event_param *req,
unsigned psn)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_data data = {};
struct ib_cm_rep_param rep = {};
@@ -442,7 +442,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
struct net_device *dev = cm_id->context;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *p;
unsigned psn;
int ret;
@@ -515,7 +515,7 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
/* Fall through */
case IB_CM_REJ_RECEIVED:
p = cm_id->context;
- priv = netdev_priv(p->dev);
+ priv = ipoib_priv(p->dev);
if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
ipoib_warn(priv, "unable to move qp to error state\n");
/* Fall through */
@@ -559,7 +559,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx_buf *rx_ring;
unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
struct sk_buff *skb, *newskb;
@@ -708,7 +708,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_tx_buf *tx_req;
int rc;
unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);
@@ -786,7 +786,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_tx *tx = wc->qp->qp_context;
unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
struct ipoib_tx_buf *tx_req;
@@ -855,7 +855,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
int ipoib_cm_dev_open(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
@@ -887,7 +887,7 @@ err_cm:
static void ipoib_cm_free_rx_reap_list(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *rx, *n;
LIST_HEAD(list);
@@ -910,7 +910,7 @@ static void ipoib_cm_free_rx_reap_list(struct net_device *dev)
void ipoib_cm_dev_stop(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *p;
unsigned long begin;
int ret;
@@ -969,7 +969,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)
static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
struct ipoib_cm_tx *p = cm_id->context;
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
struct ipoib_cm_data *data = event->private_data;
struct sk_buff_head skqueue;
struct ib_qp_attr qp_attr;
@@ -1037,7 +1037,7 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_init_attr attr = {
.send_cq = priv->recv_cq,
.recv_cq = priv->recv_cq,
@@ -1068,9 +1068,9 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
static int ipoib_cm_send_req(struct net_device *dev,
struct ib_cm_id *id, struct ib_qp *qp,
u32 qpn,
- struct ib_sa_path_rec *pathrec)
+ struct sa_path_rec *pathrec)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_data data = {};
struct ib_cm_req_param req = {};
@@ -1105,7 +1105,7 @@ static int ipoib_cm_send_req(struct net_device *dev,
static int ipoib_cm_modify_tx_init(struct net_device *dev,
struct ib_cm_id *cm_id, struct ib_qp *qp)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
ret = ib_find_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index);
@@ -1128,9 +1128,9 @@ static int ipoib_cm_modify_tx_init(struct net_device *dev,
}
static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
- struct ib_sa_path_rec *pathrec)
+ struct sa_path_rec *pathrec)
{
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
int ret;
p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring,
@@ -1186,7 +1186,7 @@ err_tx:
static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
{
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
struct ipoib_tx_buf *tx_req;
unsigned long begin;
@@ -1236,7 +1236,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ib_cm_event *event)
{
struct ipoib_cm_tx *tx = cm_id->context;
- struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
struct net_device *dev = priv->dev;
struct ipoib_neigh *neigh;
unsigned long flags;
@@ -1287,7 +1287,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
struct ipoib_neigh *neigh)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_tx *tx;
tx = kzalloc(sizeof *tx, GFP_ATOMIC);
@@ -1306,7 +1306,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
{
- struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
unsigned long flags;
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
spin_lock_irqsave(&priv->lock, flags);
@@ -1332,7 +1332,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
struct ipoib_path *path;
int ret;
- struct ib_sa_path_rec pathrec;
+ struct sa_path_rec pathrec;
u32 qpn;
netif_tx_lock_bh(dev);
@@ -1441,7 +1441,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
unsigned int mtu)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int e = skb_queue_empty(&priv->cm.skb_queue);
if (skb_dst(skb))
@@ -1490,7 +1490,8 @@ static void ipoib_cm_stale_task(struct work_struct *work)
static ssize_t show_mode(struct device *d, struct device_attribute *attr,
char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(d));
+ struct net_device *dev = to_net_dev(d);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
return sprintf(buf, "connected\n");
@@ -1503,7 +1504,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
{
struct net_device *dev = to_net_dev(d);
int ret;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags))
return -EPERM;
@@ -1532,7 +1533,7 @@ int ipoib_cm_add_mode_attr(struct net_device *dev)
static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_srq_init_attr srq_init_attr = {
.srq_type = IB_SRQT_BASIC,
.attr = {
@@ -1561,7 +1562,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
int ipoib_cm_dev_init(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int max_srq_sge, i;
INIT_LIST_HEAD(&priv->cm.passive_ids);
@@ -1622,7 +1623,7 @@ int ipoib_cm_dev_init(struct net_device *dev)
void ipoib_cm_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
if (!priv->cm.srq)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index bac455a1942d..379c02fb4181 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -60,7 +60,7 @@ static const struct ipoib_stats ipoib_gstrings_stats[] = {
static void ipoib_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *drvinfo)
{
- struct ipoib_dev_priv *priv = netdev_priv(netdev);
+ struct ipoib_dev_priv *priv = ipoib_priv(netdev);
ib_get_device_fw_str(priv->ca, drvinfo->fw_version,
sizeof(drvinfo->fw_version));
@@ -77,7 +77,7 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
static int ipoib_get_coalesce(struct net_device *dev,
struct ethtool_coalesce *coal)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs;
coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
@@ -88,7 +88,7 @@ static int ipoib_get_coalesce(struct net_device *dev,
static int ipoib_set_coalesce(struct net_device *dev,
struct ethtool_coalesce *coal)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
/*
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 6bd5740e2691..11f74cbe6660 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -210,16 +210,16 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr)
seq_printf(file,
"GID: %s\n"
" complete: %6s\n",
- gid_buf, path.pathrec.dlid ? "yes" : "no");
+ gid_buf, sa_path_get_dlid(&path.pathrec) ? "yes" : "no");
- if (path.pathrec.dlid) {
+ if (sa_path_get_dlid(&path.pathrec)) {
rate = ib_rate_to_mbps(path.pathrec.rate);
seq_printf(file,
" DLID: 0x%04x\n"
" SL: %12d\n"
" rate: %8d.%d Gb/sec\n",
- be16_to_cpu(path.pathrec.dlid),
+ be32_to_cpu(sa_path_get_dlid(&path.pathrec)),
path.pathrec.sl,
rate / 1000, rate % 1000);
}
@@ -261,7 +261,7 @@ static const struct file_operations ipoib_path_fops = {
void ipoib_create_debug_files(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
char name[IFNAMSIZ + sizeof "_path"];
snprintf(name, sizeof name, "%s_mcg", dev->name);
@@ -279,10 +279,13 @@ void ipoib_create_debug_files(struct net_device *dev)
void ipoib_delete_debug_files(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n");
+ WARN_ONCE(!priv->path_dentry, "null path debug file\n");
debugfs_remove(priv->mcg_dentry);
debugfs_remove(priv->path_dentry);
+ priv->mcg_dentry = priv->path_dentry = NULL;
}
int ipoib_register_debugfs(void)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 12c4f84a6639..0060b2f9f659 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -52,7 +52,7 @@ MODULE_PARM_DESC(data_debug_level,
#endif
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
- struct ib_pd *pd, struct ib_ah_attr *attr)
+ struct ib_pd *pd, struct rdma_ah_attr *attr)
{
struct ipoib_ah *ah;
struct ib_ah *vah;
@@ -65,13 +65,13 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
ah->last_send = 0;
kref_init(&ah->ref);
- vah = ib_create_ah(pd, attr);
+ vah = rdma_create_ah(pd, attr);
if (IS_ERR(vah)) {
kfree(ah);
ah = (struct ipoib_ah *)vah;
} else {
ah->ah = vah;
- ipoib_dbg(netdev_priv(dev), "Created ah %p\n", ah->ah);
+ ipoib_dbg(ipoib_priv(dev), "Created ah %p\n", ah->ah);
}
return ah;
@@ -80,7 +80,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
void ipoib_free_ah(struct kref *kref)
{
struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref);
- struct ipoib_dev_priv *priv = netdev_priv(ah->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ah->dev);
unsigned long flags;
@@ -99,7 +99,7 @@ static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
static int ipoib_ib_post_receive(struct net_device *dev, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_recv_wr *bad_wr;
int ret;
@@ -121,7 +121,7 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id)
static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sk_buff *skb;
int buf_size;
u64 *mapping;
@@ -153,7 +153,7 @@ error:
static int ipoib_ib_post_receives(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i;
for (i = 0; i < ipoib_recvq_size; ++i) {
@@ -172,7 +172,7 @@ static int ipoib_ib_post_receives(struct net_device *dev)
static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
struct sk_buff *skb;
u64 mapping[IPOIB_UD_RX_SG];
@@ -381,7 +381,7 @@ free_res:
static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned int wr_id = wc->wr_id;
struct ipoib_tx_buf *tx_req;
@@ -485,14 +485,14 @@ poll_more:
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
{
struct net_device *dev = dev_ptr;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
napi_schedule(&priv->napi);
}
static void drain_tx_cq(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
netif_tx_lock(dev);
while (poll_tx(priv))
@@ -506,14 +506,14 @@ static void drain_tx_cq(struct net_device *dev)
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev_ptr);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev_ptr);
mod_timer(&priv->poll_timer, jiffies);
}
static inline int post_send(struct ipoib_dev_priv *priv,
unsigned int wr_id,
- struct ib_ah *address, u32 qpn,
+ struct ib_ah *address, u32 dqpn,
struct ipoib_tx_buf *tx_req,
void *head, int hlen)
{
@@ -523,7 +523,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
ipoib_build_sge(priv, tx_req);
priv->tx_wr.wr.wr_id = wr_id;
- priv->tx_wr.remote_qpn = qpn;
+ priv->tx_wr.remote_qpn = dqpn;
priv->tx_wr.ah = address;
if (head) {
@@ -537,10 +537,10 @@ static inline int post_send(struct ipoib_dev_priv *priv,
return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr);
}
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
- struct ipoib_ah *address, u32 qpn)
+int ipoib_send(struct net_device *dev, struct sk_buff *skb,
+ struct ib_ah *address, u32 dqpn)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_tx_buf *tx_req;
int hlen, rc;
void *phead;
@@ -554,7 +554,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
} else {
if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
@@ -563,7 +563,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
- return;
+ return -1;
}
phead = NULL;
hlen = 0;
@@ -574,7 +574,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
/* Does skb_linearize return ok without reducing nr_frags? */
if (skb_shinfo(skb)->nr_frags > usable_sge) {
@@ -582,12 +582,13 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
}
- ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
- skb->len, address, qpn);
+ ipoib_dbg_data(priv,
+ "sending packet, length=%d address=%p dqpn=0x%06x\n",
+ skb->len, address, dqpn);
/*
* We put the skb into the tx_ring _before_ we call post_send()
@@ -601,7 +602,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -620,7 +621,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
skb_dst_drop(skb);
rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
- address->ah, qpn, tx_req, phead, hlen);
+ address, dqpn, tx_req, phead, hlen);
if (unlikely(rc)) {
ipoib_warn(priv, "post_send failed, error %d\n", rc);
++dev->stats.tx_errors;
@@ -629,21 +630,24 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
dev_kfree_skb_any(skb);
if (netif_queue_stopped(dev))
netif_wake_queue(dev);
+ rc = 0;
} else {
netif_trans_update(dev);
- address->last_send = priv->tx_head;
+ rc = priv->tx_head;
++priv->tx_head;
}
if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
while (poll_tx(priv))
; /* nothing */
+
+ return rc;
}
static void __ipoib_reap_ah(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_ah *ah, *tah;
LIST_HEAD(remove_list);
unsigned long flags;
@@ -654,7 +658,7 @@ static void __ipoib_reap_ah(struct net_device *dev)
list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
list_del(&ah->list);
- ib_destroy_ah(ah->ah);
+ rdma_destroy_ah(ah->ah);
kfree(ah);
}
@@ -677,7 +681,7 @@ void ipoib_reap_ah(struct work_struct *work)
static void ipoib_flush_ah(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
cancel_delayed_work(&priv->ah_reap_task);
flush_workqueue(priv->wq);
@@ -686,30 +690,124 @@ static void ipoib_flush_ah(struct net_device *dev)
static void ipoib_stop_ah(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
set_bit(IPOIB_STOP_REAPER, &priv->flags);
ipoib_flush_ah(dev);
}
-static void ipoib_ib_tx_timer_func(unsigned long ctx)
+static int recvs_pending(struct net_device *dev)
{
- drain_tx_cq((struct net_device *)ctx);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int pending = 0;
+ int i;
+
+ for (i = 0; i < ipoib_recvq_size; ++i)
+ if (priv->rx_ring[i].skb)
+ ++pending;
+
+ return pending;
}
-int ipoib_ib_dev_open(struct net_device *dev)
+int ipoib_ib_dev_stop_default(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- int ret;
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct ib_qp_attr qp_attr;
+ unsigned long begin;
+ struct ipoib_tx_buf *tx_req;
+ int i;
- ipoib_pkey_dev_check_presence(dev);
+ if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+ napi_disable(&priv->napi);
- if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
- ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
- (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
- return -1;
+ ipoib_cm_dev_stop(dev);
+
+ /*
+ * Move our QP to the error state and then reinitialize in
+ * when all work requests have completed or have been flushed.
+ */
+ qp_attr.qp_state = IB_QPS_ERR;
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+ ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
+
+ /* Wait for all sends and receives to complete */
+ begin = jiffies;
+
+ while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
+ if (time_after(jiffies, begin + 5 * HZ)) {
+ ipoib_warn(priv,
+ "timing out; %d sends %d receives not completed\n",
+ priv->tx_head - priv->tx_tail,
+ recvs_pending(dev));
+
+ /*
+ * assume the HW is wedged and just free up
+ * all our pending work requests.
+ */
+ while ((int)priv->tx_tail - (int)priv->tx_head < 0) {
+ tx_req = &priv->tx_ring[priv->tx_tail &
+ (ipoib_sendq_size - 1)];
+ ipoib_dma_unmap_tx(priv, tx_req);
+ dev_kfree_skb_any(tx_req->skb);
+ ++priv->tx_tail;
+ --priv->tx_outstanding;
+ }
+
+ for (i = 0; i < ipoib_recvq_size; ++i) {
+ struct ipoib_rx_buf *rx_req;
+
+ rx_req = &priv->rx_ring[i];
+ if (!rx_req->skb)
+ continue;
+ ipoib_ud_dma_unmap_rx(priv,
+ priv->rx_ring[i].mapping);
+ dev_kfree_skb_any(rx_req->skb);
+ rx_req->skb = NULL;
+ }
+
+ goto timeout;
+ }
+
+ ipoib_drain_cq(dev);
+
+ msleep(1);
}
+ ipoib_dbg(priv, "All sends and receives done.\n");
+
+timeout:
+ del_timer_sync(&priv->poll_timer);
+ qp_attr.qp_state = IB_QPS_RESET;
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+ ipoib_warn(priv, "Failed to modify QP to RESET state\n");
+
+ ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
+
+ return 0;
+}
+
+int ipoib_ib_dev_stop(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ priv->rn_ops->ndo_stop(dev);
+
+ clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
+ ipoib_flush_ah(dev);
+
+ return 0;
+}
+
+void ipoib_ib_tx_timer_func(unsigned long ctx)
+{
+ drain_tx_cq((struct net_device *)ctx);
+}
+
+int ipoib_ib_dev_open_default(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret;
+
ret = ipoib_init_qp(dev);
if (ret) {
ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
@@ -719,33 +817,60 @@ int ipoib_ib_dev_open(struct net_device *dev)
ret = ipoib_ib_post_receives(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
- goto dev_stop;
+ goto out;
}
ret = ipoib_cm_dev_open(dev);
if (ret) {
ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
- goto dev_stop;
+ goto out;
+ }
+
+ if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+ napi_enable(&priv->napi);
+
+ return 0;
+out:
+ return -1;
+}
+
+int ipoib_ib_dev_open(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ ipoib_pkey_dev_check_presence(dev);
+
+ if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
+ ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
+ (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
+ return -1;
}
clear_bit(IPOIB_STOP_REAPER, &priv->flags);
queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ));
- if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- napi_enable(&priv->napi);
+ if (priv->rn_ops->ndo_open(dev)) {
+ pr_warn("%s: Failed to open dev\n", dev->name);
+ goto dev_stop;
+ }
+
+ set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
return 0;
+
dev_stop:
- if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- napi_enable(&priv->napi);
+ set_bit(IPOIB_STOP_REAPER, &priv->flags);
+ cancel_delayed_work(&priv->ah_reap_task);
+ set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
+ napi_enable(&priv->napi);
ipoib_ib_dev_stop(dev);
return -1;
}
void ipoib_pkey_dev_check_presence(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (!(priv->pkey & 0x7fff) ||
ib_find_pkey(priv->ca, priv->port, priv->pkey,
@@ -757,7 +882,7 @@ void ipoib_pkey_dev_check_presence(struct net_device *dev)
void ipoib_ib_dev_up(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_pkey_dev_check_presence(dev);
@@ -773,7 +898,7 @@ void ipoib_ib_dev_up(struct net_device *dev)
void ipoib_ib_dev_down(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "downing ib_dev\n");
@@ -786,22 +911,9 @@ void ipoib_ib_dev_down(struct net_device *dev)
ipoib_flush_paths(dev);
}
-static int recvs_pending(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- int pending = 0;
- int i;
-
- for (i = 0; i < ipoib_recvq_size; ++i)
- if (priv->rx_ring[i].skb)
- ++pending;
-
- return pending;
-}
-
void ipoib_drain_cq(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i, n;
/*
@@ -838,107 +950,6 @@ void ipoib_drain_cq(struct net_device *dev)
local_bh_enable();
}
-void ipoib_ib_dev_stop(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ib_qp_attr qp_attr;
- unsigned long begin;
- struct ipoib_tx_buf *tx_req;
- int i;
-
- if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- napi_disable(&priv->napi);
-
- ipoib_cm_dev_stop(dev);
-
- /*
- * Move our QP to the error state and then reinitialize in
- * when all work requests have completed or have been flushed.
- */
- qp_attr.qp_state = IB_QPS_ERR;
- if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
- ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
-
- /* Wait for all sends and receives to complete */
- begin = jiffies;
-
- while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
- if (time_after(jiffies, begin + 5 * HZ)) {
- ipoib_warn(priv, "timing out; %d sends %d receives not completed\n",
- priv->tx_head - priv->tx_tail, recvs_pending(dev));
-
- /*
- * assume the HW is wedged and just free up
- * all our pending work requests.
- */
- while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
- tx_req = &priv->tx_ring[priv->tx_tail &
- (ipoib_sendq_size - 1)];
- ipoib_dma_unmap_tx(priv, tx_req);
- dev_kfree_skb_any(tx_req->skb);
- ++priv->tx_tail;
- --priv->tx_outstanding;
- }
-
- for (i = 0; i < ipoib_recvq_size; ++i) {
- struct ipoib_rx_buf *rx_req;
-
- rx_req = &priv->rx_ring[i];
- if (!rx_req->skb)
- continue;
- ipoib_ud_dma_unmap_rx(priv,
- priv->rx_ring[i].mapping);
- dev_kfree_skb_any(rx_req->skb);
- rx_req->skb = NULL;
- }
-
- goto timeout;
- }
-
- ipoib_drain_cq(dev);
-
- msleep(1);
- }
-
- ipoib_dbg(priv, "All sends and receives done.\n");
-
-timeout:
- del_timer_sync(&priv->poll_timer);
- qp_attr.qp_state = IB_QPS_RESET;
- if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
- ipoib_warn(priv, "Failed to modify QP to RESET state\n");
-
- ipoib_flush_ah(dev);
-
- ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
-}
-
-int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
-
- priv->ca = ca;
- priv->port = port;
- priv->qp = NULL;
-
- if (ipoib_transport_dev_init(dev, ca)) {
- printk(KERN_WARNING "%s: ipoib_transport_dev_init failed\n", ca->name);
- return -ENODEV;
- }
-
- setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func,
- (unsigned long) dev);
-
- if (dev->flags & IFF_UP) {
- if (ipoib_ib_dev_open(dev)) {
- ipoib_transport_dev_cleanup(dev);
- return -ENODEV;
- }
- }
-
- return 0;
-}
-
/*
* Takes whatever value which is in pkey index 0 and updates priv->pkey
* returns 0 if the pkey value was changed.
@@ -967,6 +978,19 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
*/
priv->dev->broadcast[8] = priv->pkey >> 8;
priv->dev->broadcast[9] = priv->pkey & 0xff;
+
+ /*
+ * Update the broadcast address in the priv->broadcast object,
+ * in case it already exists, otherwise no one will do that.
+ */
+ if (priv->broadcast) {
+ spin_lock_irq(&priv->lock);
+ memcpy(priv->broadcast->mcmember.mgid.raw,
+ priv->dev->broadcast + 4,
+ sizeof(union ib_gid));
+ spin_unlock_irq(&priv->lock);
+ }
+
return 0;
}
@@ -1216,7 +1240,7 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work)
void ipoib_ib_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "cleaning up ib_dev\n");
/*
@@ -1236,7 +1260,13 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
*/
ipoib_stop_ah(dev);
- ipoib_transport_dev_cleanup(dev);
-}
+ clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+ priv->rn_ops->ndo_uninit(dev);
+
+ if (priv->pd) {
+ ib_dealloc_pd(priv->pd);
+ priv->pd = NULL;
+ }
+}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index d1d3fb7a6127..2869d1adb1de 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,9 +108,36 @@ static struct ib_client ipoib_client = {
.get_net_dev_by_params = ipoib_get_net_dev_by_params,
};
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static int ipoib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct netdev_notifier_info *ni = ptr;
+ struct net_device *dev = ni->dev;
+
+ if (dev->netdev_ops->ndo_open != ipoib_open)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ ipoib_create_debug_files(dev);
+ break;
+ case NETDEV_CHANGENAME:
+ ipoib_delete_debug_files(dev);
+ ipoib_create_debug_files(dev);
+ break;
+ case NETDEV_UNREGISTER:
+ ipoib_delete_debug_files(dev);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+#endif
+
int ipoib_open(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "bringing up interface\n");
@@ -157,7 +184,7 @@ err_disable:
static int ipoib_stop(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "stopping interface\n");
@@ -195,7 +222,7 @@ static void ipoib_uninit(struct net_device *dev)
static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
@@ -205,7 +232,7 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu
static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
/* dev->mtu > 2K ==> connected mode */
if (ipoib_cm_admin_enabled(dev)) {
@@ -468,7 +495,7 @@ static struct net_device *ipoib_get_net_dev_by_params(
int ipoib_set_mode(struct net_device *dev, const char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if ((test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags) &&
!strcmp(buf, "connected\n")) ||
@@ -505,7 +532,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
struct ipoib_path *__path_find(struct net_device *dev, void *gid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node *n = priv->path_tree.rb_node;
struct ipoib_path *path;
int ret;
@@ -529,7 +556,7 @@ struct ipoib_path *__path_find(struct net_device *dev, void *gid)
static int __path_add(struct net_device *dev, struct ipoib_path *path)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node **n = &priv->path_tree.rb_node;
struct rb_node *pn = NULL;
struct ipoib_path *tpath;
@@ -564,7 +591,7 @@ static void path_free(struct net_device *dev, struct ipoib_path *path)
while ((skb = __skb_dequeue(&path->queue)))
dev_kfree_skb_irq(skb);
- ipoib_dbg(netdev_priv(dev), "path_free\n");
+ ipoib_dbg(ipoib_priv(dev), "path_free\n");
/* remove all neigh connected to this path */
ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
@@ -598,7 +625,7 @@ struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
int ipoib_path_iter_next(struct ipoib_path_iter *iter)
{
- struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(iter->dev);
struct rb_node *n;
struct ipoib_path *path;
int ret = 1;
@@ -635,92 +662,21 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
void ipoib_mark_paths_invalid(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_path *path, *tp;
spin_lock_irq(&priv->lock);
list_for_each_entry_safe(path, tp, &priv->path_list, list) {
- ipoib_dbg(priv, "mark path LID 0x%04x GID %pI6 invalid\n",
- be16_to_cpu(path->pathrec.dlid),
- path->pathrec.dgid.raw);
+ ipoib_dbg(priv, "mark path LID 0x%08x GID %pI6 invalid\n",
+ be32_to_cpu(sa_path_get_dlid(&path->pathrec)),
+ path->pathrec.dgid.raw);
path->valid = 0;
}
spin_unlock_irq(&priv->lock);
}
-struct classport_info_context {
- struct ipoib_dev_priv *priv;
- struct completion done;
- struct ib_sa_query *sa_query;
-};
-
-static void classport_info_query_cb(int status, struct ib_class_port_info *rec,
- void *context)
-{
- struct classport_info_context *cb_ctx = context;
- struct ipoib_dev_priv *priv;
-
- WARN_ON(!context);
-
- priv = cb_ctx->priv;
-
- if (status || !rec) {
- pr_debug("device: %s failed query classport_info status: %d\n",
- priv->dev->name, status);
- /* keeps the default, will try next mcast_restart */
- priv->sm_fullmember_sendonly_support = false;
- goto out;
- }
-
- if (ib_get_cpi_capmask2(rec) &
- IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT) {
- pr_debug("device: %s enabled fullmember-sendonly for sendonly MCG\n",
- priv->dev->name);
- priv->sm_fullmember_sendonly_support = true;
- } else {
- pr_debug("device: %s disabled fullmember-sendonly for sendonly MCG\n",
- priv->dev->name);
- priv->sm_fullmember_sendonly_support = false;
- }
-
-out:
- complete(&cb_ctx->done);
-}
-
-int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv)
-{
- struct classport_info_context *callback_context;
- int ret;
-
- callback_context = kmalloc(sizeof(*callback_context), GFP_KERNEL);
- if (!callback_context)
- return -ENOMEM;
-
- callback_context->priv = priv;
- init_completion(&callback_context->done);
-
- ret = ib_sa_classport_info_rec_query(&ipoib_sa_client,
- priv->ca, priv->port, 3000,
- GFP_KERNEL,
- classport_info_query_cb,
- callback_context,
- &callback_context->sa_query);
- if (ret < 0) {
- pr_info("%s failed to send ib_sa_classport_info query, ret: %d\n",
- priv->dev->name, ret);
- kfree(callback_context);
- return ret;
- }
-
- /* waiting for the callback to finish before returnning */
- wait_for_completion(&callback_context->done);
- kfree(callback_context);
-
- return ret;
-}
-
static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
{
struct ipoib_pseudo_header *phdr;
@@ -731,7 +687,7 @@ static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
void ipoib_flush_paths(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_path *path, *tp;
LIST_HEAD(remove_list);
unsigned long flags;
@@ -760,12 +716,12 @@ void ipoib_flush_paths(struct net_device *dev)
}
static void path_rec_completion(int status,
- struct ib_sa_path_rec *pathrec,
+ struct sa_path_rec *pathrec,
void *path_ptr)
{
struct ipoib_path *path = path_ptr;
struct net_device *dev = path->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_ah *ah = NULL;
struct ipoib_ah *old_ah = NULL;
struct ipoib_neigh *neigh, *tn;
@@ -775,7 +731,8 @@ static void path_rec_completion(int status,
if (!status)
ipoib_dbg(priv, "PathRec LID 0x%04x for GID %pI6\n",
- be16_to_cpu(pathrec->dlid), pathrec->dgid.raw);
+ be32_to_cpu(sa_path_get_dlid(pathrec)),
+ pathrec->dgid.raw);
else
ipoib_dbg(priv, "PathRec status %d for GID %pI6\n",
status, path->pathrec.dgid.raw);
@@ -783,7 +740,7 @@ static void path_rec_completion(int status,
skb_queue_head_init(&skqueue);
if (!status) {
- struct ib_ah_attr av;
+ struct rdma_ah_attr av;
if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
ah = ipoib_create_ah(dev, priv->pd, &av);
@@ -798,7 +755,8 @@ static void path_rec_completion(int status,
path->ah = ah;
ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
- ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
+ ah, be32_to_cpu(sa_path_get_dlid(pathrec)),
+ pathrec->sl);
while ((skb = __skb_dequeue(&path->queue)))
__skb_queue_tail(&skqueue, skb);
@@ -858,7 +816,7 @@ static void path_rec_completion(int status,
static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_path *path;
if (!priv->broadcast)
@@ -874,6 +832,10 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
INIT_LIST_HEAD(&path->neigh_list);
+ if (rdma_cap_opa_ah(priv->ca, priv->port))
+ path->pathrec.rec_type = SA_PATH_REC_TYPE_OPA;
+ else
+ path->pathrec.rec_type = SA_PATH_REC_TYPE_IB;
memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid));
path->pathrec.sgid = priv->local_gid;
path->pathrec.pkey = cpu_to_be16(priv->pkey);
@@ -886,7 +848,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
static int path_rec_start(struct net_device *dev,
struct ipoib_path *path)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "Start path record lookup for %pI6\n",
path->pathrec.dgid.raw);
@@ -917,7 +879,8 @@ static int path_rec_start(struct net_device *dev,
static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_path *path;
struct ipoib_neigh *neigh;
unsigned long flags;
@@ -964,7 +927,8 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
}
} else {
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr));
+ path->ah->last_send = rn->send(dev, skb, path->ah->ah,
+ IPOIB_QPN(daddr));
ipoib_neigh_put(neigh);
return;
}
@@ -998,7 +962,8 @@ err_drop:
static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
struct ipoib_pseudo_header *phdr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_path *path;
unsigned long flags;
@@ -1038,11 +1003,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
}
if (path->ah) {
- ipoib_dbg(priv, "Send unicast ARP to %04x\n",
- be16_to_cpu(path->pathrec.dlid));
+ ipoib_dbg(priv, "Send unicast ARP to %08x\n",
+ be32_to_cpu(sa_path_get_dlid(&path->pathrec)));
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+ path->ah->last_send = rn->send(dev, skb, path->ah->ah,
+ IPOIB_QPN(phdr->hwaddr));
return;
} else if ((path->query || !path_rec_start(dev, path)) &&
skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
@@ -1058,7 +1024,8 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_neigh *neigh;
struct ipoib_pseudo_header *phdr;
struct ipoib_header *header;
@@ -1122,7 +1089,8 @@ send_using_neigh:
goto unref;
}
} else if (neigh->ah) {
- ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(phdr->hwaddr));
+ neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
+ IPOIB_QPN(phdr->hwaddr));
goto unref;
}
@@ -1144,7 +1112,7 @@ unref:
static void ipoib_timeout(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
jiffies_to_msecs(jiffies - dev_trans_start(dev)));
@@ -1178,7 +1146,7 @@ static int ipoib_hard_header(struct sk_buff *skb,
static void ipoib_set_mcast_list(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
ipoib_dbg(priv, "IPOIB_FLAG_OPER_UP not set");
@@ -1190,7 +1158,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
static int ipoib_get_iflink(const struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
/* parent interface */
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
@@ -1218,7 +1186,7 @@ static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
struct ipoib_neigh *neigh = NULL;
@@ -1347,7 +1315,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
struct ipoib_neigh *neigh;
@@ -1404,7 +1372,7 @@ void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
{
/* neigh reference count was dropprd to zero */
struct net_device *dev = neigh->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sk_buff *skb;
if (neigh->ah)
ipoib_put_ah(neigh->ah);
@@ -1414,7 +1382,7 @@ void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
}
if (ipoib_cm_get(neigh))
ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
- ipoib_dbg(netdev_priv(dev),
+ ipoib_dbg(ipoib_priv(dev),
"neigh free for %06x %pI6\n",
IPOIB_QPN(neigh->daddr),
neigh->daddr + 4);
@@ -1436,7 +1404,7 @@ static void ipoib_neigh_reclaim(struct rcu_head *rp)
void ipoib_neigh_free(struct ipoib_neigh *neigh)
{
struct net_device *dev = neigh->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
struct ipoib_neigh __rcu **np;
@@ -1519,7 +1487,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
unsigned long flags;
@@ -1605,7 +1573,7 @@ out_unlock:
static void ipoib_neigh_hash_uninit(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int stopped;
ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
@@ -1622,10 +1590,26 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev)
wait_for_completion(&priv->ntbl.deleted);
}
+void ipoib_dev_uninit_default(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
-int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+ ipoib_transport_dev_cleanup(dev);
+
+ ipoib_cm_dev_cleanup(dev);
+
+ kfree(priv->rx_ring);
+ vfree(priv->tx_ring);
+
+ priv->rx_ring = NULL;
+ priv->tx_ring = NULL;
+}
+
+static int ipoib_dev_init_default(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
/* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
@@ -1636,46 +1620,111 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
if (!priv->tx_ring) {
printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
- ca->name, ipoib_sendq_size);
+ priv->ca->name, ipoib_sendq_size);
goto out_rx_ring_cleanup;
}
/* priv->tx_head, tx_tail & tx_outstanding are already 0 */
- if (ipoib_ib_dev_init(dev, ca, port))
+ if (ipoib_transport_dev_init(dev, priv->ca)) {
+ pr_warn("%s: ipoib_transport_dev_init failed\n",
+ priv->ca->name);
goto out_tx_ring_cleanup;
+ }
+
+ /* after qp created set dev address */
+ priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
+ priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
+ priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff;
+
+ setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func,
+ (unsigned long)dev);
+
+ return 0;
+
+out_tx_ring_cleanup:
+ vfree(priv->tx_ring);
+
+out_rx_ring_cleanup:
+ kfree(priv->rx_ring);
+
+out:
+ return -ENOMEM;
+}
+
+int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret = -ENOMEM;
+
+ priv->ca = ca;
+ priv->port = port;
+ priv->qp = NULL;
/*
- * Must be after ipoib_ib_dev_init so we can allocate a per
- * device wq there and use it here
+ * the various IPoIB tasks assume they will never race against
+ * themselves, so always use a single thread workqueue
*/
- if (ipoib_neigh_hash_init(priv) < 0)
+ priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM);
+ if (!priv->wq) {
+ pr_warn("%s: failed to allocate device WQ\n", dev->name);
+ goto out;
+ }
+
+ /* create pd, which used both for control and datapath*/
+ priv->pd = ib_alloc_pd(priv->ca, 0);
+ if (IS_ERR(priv->pd)) {
+ pr_warn("%s: failed to allocate PD\n", ca->name);
+ goto clean_wq;
+ }
+
+ ret = priv->rn_ops->ndo_init(dev);
+ if (ret) {
+ pr_warn("%s failed to init HW resource\n", dev->name);
+ goto out_free_pd;
+ }
+
+ if (ipoib_neigh_hash_init(priv) < 0) {
+ pr_warn("%s failed to init neigh hash\n", dev->name);
goto out_dev_uninit;
+ }
+
+ if (dev->flags & IFF_UP) {
+ if (ipoib_ib_dev_open(dev)) {
+ pr_warn("%s failed to open device\n", dev->name);
+ ret = -ENODEV;
+ goto out_dev_uninit;
+ }
+ }
return 0;
out_dev_uninit:
ipoib_ib_dev_cleanup(dev);
-out_tx_ring_cleanup:
- vfree(priv->tx_ring);
+out_free_pd:
+ if (priv->pd) {
+ ib_dealloc_pd(priv->pd);
+ priv->pd = NULL;
+ }
-out_rx_ring_cleanup:
- kfree(priv->rx_ring);
+clean_wq:
+ if (priv->wq) {
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+ }
out:
- return -ENOMEM;
+ return ret;
}
void ipoib_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
+ struct ipoib_dev_priv *priv = ipoib_priv(dev), *cpriv, *tcpriv;
LIST_HEAD(head);
ASSERT_RTNL();
- ipoib_delete_debug_files(dev);
-
/* Delete any child interfaces first */
list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
/* Stop GC on child */
@@ -1685,24 +1734,21 @@ void ipoib_dev_cleanup(struct net_device *dev)
}
unregister_netdevice_many(&head);
- /*
- * Must be before ipoib_ib_dev_cleanup or we delete an in use
- * work queue
- */
ipoib_neigh_hash_uninit(dev);
ipoib_ib_dev_cleanup(dev);
- kfree(priv->rx_ring);
- vfree(priv->tx_ring);
-
- priv->rx_ring = NULL;
- priv->tx_ring = NULL;
+ /* no more works over the priv->wq */
+ if (priv->wq) {
+ flush_workqueue(priv->wq);
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+ }
}
static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return ib_set_vf_link_state(priv->ca, vf, priv->port, link_state);
}
@@ -1710,7 +1756,7 @@ static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_stat
static int ipoib_get_vf_config(struct net_device *dev, int vf,
struct ifla_vf_info *ivf)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int err;
err = ib_get_vf_config(priv->ca, vf, priv->port, ivf);
@@ -1724,7 +1770,7 @@ static int ipoib_get_vf_config(struct net_device *dev, int vf,
static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (type != IFLA_VF_IB_NODE_GUID && type != IFLA_VF_IB_PORT_GUID)
return -EINVAL;
@@ -1735,7 +1781,7 @@ static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
static int ipoib_get_vf_stats(struct net_device *dev, int vf,
struct ifla_vf_stats *vf_stats)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return ib_get_vf_stats(priv->ca, vf, priv->port, vf_stats);
}
@@ -1773,21 +1819,12 @@ static const struct net_device_ops ipoib_netdev_ops_vf = {
.ndo_get_iflink = ipoib_get_iflink,
};
-void ipoib_setup(struct net_device *dev)
+void ipoib_setup_common(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
-
- if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
- dev->netdev_ops = &ipoib_netdev_ops_vf;
- else
- dev->netdev_ops = &ipoib_netdev_ops_pf;
-
dev->header_ops = &ipoib_header_ops;
ipoib_set_ethtool_ops(dev);
- netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
-
dev->watchdog_timeo = HZ;
dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
@@ -1801,11 +1838,14 @@ void ipoib_setup(struct net_device *dev)
netif_keep_dst(dev);
memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
+}
- priv->dev = dev;
+static void ipoib_build_priv(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ priv->dev = dev;
spin_lock_init(&priv->lock);
-
init_rwsem(&priv->vlan_rwsem);
INIT_LIST_HEAD(&priv->path_list);
@@ -1823,22 +1863,99 @@ void ipoib_setup(struct net_device *dev)
INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh);
}
-struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
+static const struct net_device_ops ipoib_netdev_default_pf = {
+ .ndo_init = ipoib_dev_init_default,
+ .ndo_uninit = ipoib_dev_uninit_default,
+ .ndo_open = ipoib_ib_dev_open_default,
+ .ndo_stop = ipoib_ib_dev_stop_default,
+};
+
+static struct net_device
+*ipoib_create_netdev_default(struct ib_device *hca,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *))
{
struct net_device *dev;
+ struct rdma_netdev *rn;
- dev = alloc_netdev((int)sizeof(struct ipoib_dev_priv), name,
- NET_NAME_UNKNOWN, ipoib_setup);
+ dev = alloc_netdev((int)sizeof(struct rdma_netdev),
+ name,
+ name_assign_type, setup);
if (!dev)
return NULL;
- return netdev_priv(dev);
+ rn = netdev_priv(dev);
+
+ rn->send = ipoib_send;
+ rn->attach_mcast = ipoib_mcast_attach;
+ rn->detach_mcast = ipoib_mcast_detach;
+ rn->hca = hca;
+
+ dev->netdev_ops = &ipoib_netdev_default_pf;
+
+ return dev;
+}
+
+static struct net_device *ipoib_get_netdev(struct ib_device *hca, u8 port,
+ const char *name)
+{
+ struct net_device *dev;
+
+ if (hca->alloc_rdma_netdev) {
+ dev = hca->alloc_rdma_netdev(hca, port,
+ RDMA_NETDEV_IPOIB, name,
+ NET_NAME_UNKNOWN,
+ ipoib_setup_common);
+ if (IS_ERR_OR_NULL(dev) && PTR_ERR(dev) != -EOPNOTSUPP)
+ return NULL;
+ }
+
+ if (!hca->alloc_rdma_netdev || PTR_ERR(dev) == -EOPNOTSUPP)
+ dev = ipoib_create_netdev_default(hca, name, NET_NAME_UNKNOWN,
+ ipoib_setup_common);
+
+ return dev;
+}
+
+struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
+ const char *name)
+{
+ struct net_device *dev;
+ struct ipoib_dev_priv *priv;
+ struct rdma_netdev *rn;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return NULL;
+
+ dev = ipoib_get_netdev(hca, port, name);
+ if (!dev)
+ goto free_priv;
+
+ priv->rn_ops = dev->netdev_ops;
+
+ /* fixme : should be after the query_cap */
+ if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
+ dev->netdev_ops = &ipoib_netdev_ops_vf;
+ else
+ dev->netdev_ops = &ipoib_netdev_ops_pf;
+
+ rn = netdev_priv(dev);
+ rn->clnt_priv = priv;
+ ipoib_build_priv(dev);
+
+ return priv;
+free_priv:
+ kfree(priv);
+ return NULL;
}
static ssize_t show_pkey(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+ struct net_device *ndev = to_net_dev(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
return sprintf(buf, "0x%04x\n", priv->pkey);
}
@@ -1847,14 +1964,15 @@ static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
static ssize_t show_umcast(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+ struct net_device *ndev = to_net_dev(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
}
void ipoib_set_umcast(struct net_device *ndev, int umcast_val)
{
- struct ipoib_dev_priv *priv = netdev_priv(ndev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
if (umcast_val > 0) {
set_bit(IPOIB_FLAG_UMCAST, &priv->flags);
@@ -1927,7 +2045,7 @@ static int ipoib_check_lladdr(struct net_device *dev,
static int ipoib_set_mac(struct net_device *dev, void *addr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sockaddr_storage *ss = addr;
int ret;
@@ -2000,7 +2118,7 @@ void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
priv->hca_caps = hca->attrs.device_cap_flags;
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
- priv->dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
+ priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
priv->dev->hw_features |= NETIF_F_TSO;
@@ -2016,7 +2134,7 @@ static struct net_device *ipoib_add_port(const char *format,
struct ib_port_attr attr;
int result = -ENOMEM;
- priv = ipoib_intf_alloc(format);
+ priv = ipoib_intf_alloc(hca, port, format);
if (!priv)
goto alloc_mem_failed;
@@ -2090,8 +2208,6 @@ static struct net_device *ipoib_add_port(const char *format,
goto register_failed;
}
- ipoib_create_debug_files(priv->dev);
-
if (ipoib_cm_add_mode_attr(priv->dev))
goto sysfs_failed;
if (ipoib_add_pkey_attr(priv->dev))
@@ -2106,7 +2222,6 @@ static struct net_device *ipoib_add_port(const char *format,
return priv->dev;
sysfs_failed:
- ipoib_delete_debug_files(priv->dev);
unregister_netdev(priv->dev);
register_failed:
@@ -2146,7 +2261,7 @@ static void ipoib_add_one(struct ib_device *device)
continue;
dev = ipoib_add_port("ib%d", device, p);
if (!IS_ERR(dev)) {
- priv = netdev_priv(dev);
+ priv = ipoib_priv(dev);
list_add_tail(&priv->list, dev_list);
count++;
}
@@ -2186,11 +2301,18 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
unregister_netdev(priv->dev);
free_netdev(priv->dev);
+ kfree(priv);
}
kfree(dev_list);
}
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static struct notifier_block ipoib_netdev_notifier = {
+ .notifier_call = ipoib_netdev_event,
+};
+#endif
+
static int __init ipoib_init_module(void)
{
int ret;
@@ -2243,6 +2365,9 @@ static int __init ipoib_init_module(void)
if (ret)
goto err_client;
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+ register_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
return 0;
err_client:
@@ -2260,6 +2385,9 @@ err_fs:
static void __exit ipoib_cleanup_module(void)
{
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+ unregister_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
ipoib_netlink_fini();
ib_unregister_client(&ipoib_client);
ib_sa_unregister_client(&ipoib_sa_client);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 69e146cdc306..057f58e6afca 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -114,7 +114,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
struct net_device *dev = mcast->dev;
int tx_dropped = 0;
- ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n",
+ ipoib_dbg_mcast(ipoib_priv(dev), "deleting multicast group %pI6\n",
mcast->mcmember.mgid.raw);
/* remove all neigh connected to this mcast */
@@ -158,7 +158,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node *n = priv->multicast_tree.rb_node;
while (n) {
@@ -182,7 +182,7 @@ static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid
static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
while (*n) {
@@ -212,8 +212,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
struct ib_sa_mcmember_rec *mcmember)
{
struct net_device *dev = mcast->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_ah *ah;
+ struct rdma_ah_attr av;
int ret;
int set_qkey = 0;
@@ -260,8 +262,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
return 0;
}
- ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
- &mcast->mcmember.mgid, set_qkey);
+ ret = rn->attach_mcast(dev, priv->ca, &mcast->mcmember.mgid,
+ be16_to_cpu(mcast->mcmember.mlid),
+ set_qkey, priv->qkey);
if (ret < 0) {
ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n",
mcast->mcmember.mgid.raw);
@@ -271,40 +274,34 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
}
}
- {
- struct ib_ah_attr av = {
- .dlid = be16_to_cpu(mcast->mcmember.mlid),
- .port_num = priv->port,
- .sl = mcast->mcmember.sl,
- .ah_flags = IB_AH_GRH,
- .static_rate = mcast->mcmember.rate,
- .grh = {
- .flow_label = be32_to_cpu(mcast->mcmember.flow_label),
- .hop_limit = mcast->mcmember.hop_limit,
- .sgid_index = 0,
- .traffic_class = mcast->mcmember.traffic_class
- }
- };
- av.grh.dgid = mcast->mcmember.mgid;
-
- ah = ipoib_create_ah(dev, priv->pd, &av);
- if (IS_ERR(ah)) {
- ipoib_warn(priv, "ib_address_create failed %ld\n",
- -PTR_ERR(ah));
- /* use original error */
- return PTR_ERR(ah);
- } else {
- spin_lock_irq(&priv->lock);
- mcast->ah = ah;
- spin_unlock_irq(&priv->lock);
-
- ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",
- mcast->mcmember.mgid.raw,
- mcast->ah->ah,
- be16_to_cpu(mcast->mcmember.mlid),
- mcast->mcmember.sl);
- }
+ memset(&av, 0, sizeof(av));
+ av.type = rdma_ah_find_type(priv->ca, priv->port);
+ rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid)),
+ rdma_ah_set_port_num(&av, priv->port);
+ rdma_ah_set_sl(&av, mcast->mcmember.sl);
+ rdma_ah_set_static_rate(&av, mcast->mcmember.rate);
+
+ rdma_ah_set_grh(&av, &mcast->mcmember.mgid,
+ be32_to_cpu(mcast->mcmember.flow_label),
+ 0, mcast->mcmember.hop_limit,
+ mcast->mcmember.traffic_class);
+
+ ah = ipoib_create_ah(dev, priv->pd, &av);
+ if (IS_ERR(ah)) {
+ ipoib_warn(priv, "ib_address_create failed %ld\n",
+ -PTR_ERR(ah));
+ /* use original error */
+ return PTR_ERR(ah);
}
+ spin_lock_irq(&priv->lock);
+ mcast->ah = ah;
+ spin_unlock_irq(&priv->lock);
+
+ ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",
+ mcast->mcmember.mgid.raw,
+ mcast->ah->ah,
+ be16_to_cpu(mcast->mcmember.mlid),
+ mcast->mcmember.sl);
/* actually send any queued packets */
netif_tx_lock_bh(dev);
@@ -331,7 +328,6 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
carrier_on_task);
struct ib_port_attr attr;
- int ret;
if (ib_query_port(priv->ca, priv->port, &attr) ||
attr.state != IB_PORT_ACTIVE) {
@@ -344,11 +340,9 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
* because the broadcast group must always be joined first and is always
* re-joined if the SM changes substantially.
*/
- ret = ipoib_check_sm_sendonly_fullmember_support(priv);
- if (ret < 0)
- pr_debug("%s failed query sm support for sendonly-fullmember (ret: %d)\n",
- priv->dev->name, ret);
-
+ priv->sm_fullmember_sendonly_support =
+ ib_sa_sendonly_fullmem_support(&ipoib_sa_client,
+ priv->ca, priv->port);
/*
* Take rtnl_lock to avoid racing with ipoib_stop() and
* turning the carrier back on while a device is being
@@ -375,7 +369,7 @@ static int ipoib_mcast_join_complete(int status,
{
struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
@@ -477,7 +471,7 @@ out_locked:
*/
static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_sa_multicast *multicast;
struct ib_sa_mcmember_rec rec = {
.join_state = 1
@@ -489,6 +483,9 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
return -EINVAL;
+ init_completion(&mcast->done);
+ set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+
ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw);
rec.mgid = mcast->mcmember.mgid;
@@ -647,8 +644,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
if (mcast->backoff == 1 ||
time_after_eq(jiffies, mcast->delay_until)) {
/* Found the next unjoined group */
- init_completion(&mcast->done);
- set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
if (ipoib_mcast_join(dev, mcast)) {
spin_unlock_irq(&priv->lock);
return;
@@ -668,17 +663,15 @@ out:
queue_delayed_work(priv->wq, &priv->mcast_task,
delay_until - jiffies);
}
- if (mcast) {
- init_completion(&mcast->done);
- set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ if (mcast)
ipoib_mcast_join(dev, mcast);
- }
+
spin_unlock_irq(&priv->lock);
}
void ipoib_mcast_start_thread(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned long flags;
ipoib_dbg_mcast(priv, "starting multicast thread\n");
@@ -690,7 +683,7 @@ void ipoib_mcast_start_thread(struct net_device *dev)
int ipoib_mcast_stop_thread(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned long flags;
ipoib_dbg_mcast(priv, "stopping multicast thread\n");
@@ -706,7 +699,8 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
int ret = 0;
if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
@@ -720,8 +714,8 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
mcast->mcmember.mgid.raw);
/* Remove ourselves from the multicast group */
- ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
- be16_to_cpu(mcast->mcmember.mlid));
+ ret = rn->detach_mcast(dev, priv->ca, &mcast->mcmember.mgid,
+ be16_to_cpu(mcast->mcmember.mlid));
if (ret)
ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
} else if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
@@ -762,7 +756,8 @@ void ipoib_mcast_remove_list(struct list_head *remove_list)
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_mcast *mcast;
unsigned long flags;
void *mgid = daddr + 4;
@@ -825,7 +820,8 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
}
}
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+ mcast->ah->last_send = rn->send(dev, skb, mcast->ah->ah,
+ IB_MULTICAST_QPN);
if (neigh)
ipoib_neigh_put(neigh);
return;
@@ -837,7 +833,7 @@ unlock:
void ipoib_mcast_dev_flush(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
LIST_HEAD(remove_list);
struct ipoib_mcast *mcast, *tmcast;
unsigned long flags;
@@ -1029,7 +1025,7 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
{
- struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(iter->dev);
struct rb_node *n;
struct ipoib_mcast *mcast;
int ret = 1;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index cdc7df4fdb8a..28884781311b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -44,7 +44,7 @@ static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = {
static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
u16 val;
if (nla_put_u16(skb, IFLA_IPOIB_PKEY, priv->pkey))
@@ -107,7 +107,7 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
if (!pdev || pdev->type != ARPHRD_INFINIBAND)
return -ENODEV;
- ppriv = netdev_priv(pdev);
+ ppriv = ipoib_priv(pdev);
if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) {
ipoib_warn(ppriv, "child creation disallowed for child devices\n");
@@ -129,7 +129,8 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
*/
child_pkey |= 0x8000;
- err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD);
+ err = __ipoib_vlan_add(ppriv, ipoib_priv(dev),
+ child_pkey, IPOIB_RTNL_CHILD);
if (!err && data)
err = ipoib_changelink(dev, tb, data);
@@ -140,8 +141,8 @@ static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head
{
struct ipoib_dev_priv *priv, *ppriv;
- priv = netdev_priv(dev);
- ppriv = netdev_priv(priv->parent);
+ priv = ipoib_priv(dev);
+ ppriv = ipoib_priv(priv->parent);
down_write(&ppriv->vlan_rwsem);
unregister_netdevice_queue(dev, head);
@@ -161,7 +162,7 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
.maxtype = IFLA_IPOIB_MAX,
.policy = ipoib_policy,
.priv_size = sizeof(struct ipoib_dev_priv),
- .setup = ipoib_setup,
+ .setup = ipoib_setup_common,
.newlink = ipoib_new_child_link,
.changelink = ipoib_changelink,
.dellink = ipoib_unregister_child_dev,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 189dcd1709d2..bb64baf25309 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -35,9 +35,10 @@
#include "ipoib.h"
-int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey)
+int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr *qp_attr = NULL;
int ret;
u16 pkey_index;
@@ -56,7 +57,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int
goto out;
/* set correct QKey for QP */
- qp_attr->qkey = priv->qkey;
+ qp_attr->qkey = qkey;
ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
if (ret) {
ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
@@ -74,9 +75,20 @@ out:
return ret;
}
+int ipoib_mcast_detach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret;
+
+ ret = ib_detach_mcast(priv->qp, mgid, mlid);
+
+ return ret;
+}
+
int ipoib_init_qp(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
struct ib_qp_attr qp_attr;
int attr_mask;
@@ -130,7 +142,7 @@ out_fail:
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_init_attr init_attr = {
.cap = {
.max_send_wr = ipoib_sendq_size,
@@ -147,22 +159,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
int ret, size;
int i;
- priv->pd = ib_alloc_pd(priv->ca, 0);
- if (IS_ERR(priv->pd)) {
- printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
- return -ENODEV;
- }
-
- /*
- * the various IPoIB tasks assume they will never race against
- * themselves, so always use a single thread workqueue
- */
- priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM);
- if (!priv->wq) {
- printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
- goto out_free_pd;
- }
-
size = ipoib_recvq_size + 1;
ret = ipoib_cm_dev_init(dev);
if (!ret) {
@@ -173,7 +169,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
size += ipoib_recvq_size * ipoib_max_conn_qp;
} else
if (ret != -ENOSYS)
- goto out_free_wq;
+ return -ENODEV;
cq_attr.cqe = size;
priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL,
@@ -212,10 +208,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
goto out_free_send_cq;
}
- priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
- priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
- priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff;
-
for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
priv->tx_sge[i].lkey = priv->pd->local_dma_lkey;
@@ -247,26 +239,18 @@ out_free_recv_cq:
out_cm_dev_cleanup:
ipoib_cm_dev_cleanup(dev);
-out_free_wq:
- destroy_workqueue(priv->wq);
- priv->wq = NULL;
-
-out_free_pd:
- ib_dealloc_pd(priv->pd);
-
return -ENODEV;
}
void ipoib_transport_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (priv->qp) {
if (ib_destroy_qp(priv->qp))
ipoib_warn(priv, "ib_qp_destroy failed\n");
priv->qp = NULL;
- clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
}
if (ib_destroy_cq(priv->send_cq))
@@ -274,16 +258,6 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
if (ib_destroy_cq(priv->recv_cq))
ipoib_warn(priv, "ib_cq_destroy (recv) failed\n");
-
- ipoib_cm_dev_cleanup(dev);
-
- if (priv->wq) {
- flush_workqueue(priv->wq);
- destroy_workqueue(priv->wq);
- priv->wq = NULL;
- }
-
- ib_dealloc_pd(priv->pd);
}
void ipoib_event(struct ib_event_handler *handler,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 3e10e3dac2e7..36dc4fcaa3cd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -44,7 +44,7 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr,
char *buf)
{
struct net_device *dev = to_net_dev(d);
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return sprintf(buf, "%s\n", priv->parent->name);
}
@@ -86,8 +86,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
goto register_failed;
}
- ipoib_create_debug_files(priv->dev);
-
/* RTNL childs don't need proprietary sysfs entries */
if (type == IPOIB_LEGACY_CHILD) {
if (ipoib_cm_add_mode_attr(priv->dev))
@@ -108,7 +106,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
sysfs_failed:
result = -ENOMEM;
- ipoib_delete_debug_files(priv->dev);
unregister_netdevice(priv->dev);
register_failed:
@@ -128,14 +125,15 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- ppriv = netdev_priv(pdev);
+ ppriv = ipoib_priv(pdev);
if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
return -EPERM;
snprintf(intf_name, sizeof intf_name, "%s.%04x",
ppriv->dev->name, pkey);
- priv = ipoib_intf_alloc(intf_name);
+
+ priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
if (!priv)
return -ENOMEM;
@@ -183,7 +181,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- ppriv = netdev_priv(pdev);
+ ppriv = ipoib_priv(pdev);
if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
return -EPERM;
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 81ae2e30dd12..12ed62ce9ff7 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -612,7 +612,7 @@ iser_check_remote_inv(struct iser_conn *iser_conn,
iser_conn, rkey);
if (unlikely(!iser_conn->snd_w_inv)) {
- iser_err("conn %p: unexepected remote invalidation, "
+ iser_err("conn %p: unexpected remote invalidation, "
"terminating connection\n", iser_conn);
return -EPROTO;
}
diff --git a/drivers/infiniband/ulp/opa_vnic/Kconfig b/drivers/infiniband/ulp/opa_vnic/Kconfig
new file mode 100644
index 000000000000..48132ab5e6b9
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Kconfig
@@ -0,0 +1,8 @@
+config INFINIBAND_OPA_VNIC
+ tristate "Intel OPA VNIC support"
+ depends on X86_64 && INFINIBAND
+ ---help---
+ This is Omni-Path (OPA) Virtual Network Interface Controller (VNIC)
+ driver for Ethernet over Omni-Path feature. It implements the HW
+ independent VNIC functionality. It interfaces with Linux stack for
+ data path and IB MAD for the control path.
diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile b/drivers/infiniband/ulp/opa_vnic/Makefile
new file mode 100644
index 000000000000..8061b287cfe4
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Makefile
@@ -0,0 +1,7 @@
+# Makefile - Intel Omni-Path Virtual Network Controller driver
+# Copyright(c) 2017, Intel Corporation.
+#
+obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o
+
+opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o \
+ opa_vnic_vema.o opa_vnic_vema_iface.o
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
new file mode 100644
index 000000000000..2e8fee982436
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC encapsulation/decapsulation function.
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+
+#include "opa_vnic_internal.h"
+
+/* OPA 16B Header fields */
+#define OPA_16B_LID_MASK 0xFFFFFull
+#define OPA_16B_SLID_HIGH_SHFT 8
+#define OPA_16B_SLID_MASK 0xF00ull
+#define OPA_16B_DLID_MASK 0xF000ull
+#define OPA_16B_DLID_HIGH_SHFT 12
+#define OPA_16B_LEN_SHFT 20
+#define OPA_16B_SC_SHFT 20
+#define OPA_16B_RC_SHFT 25
+#define OPA_16B_PKEY_SHFT 16
+
+#define OPA_VNIC_L4_HDR_SHFT 16
+
+/* L2+L4 hdr len is 20 bytes (5 quad words) */
+#define OPA_VNIC_HDR_QW_LEN 5
+
+static inline void opa_vnic_make_header(u8 *hdr, u32 slid, u32 dlid, u16 len,
+ u16 pkey, u16 entropy, u8 sc, u8 rc,
+ u8 l4_type, u16 l4_hdr)
+{
+ /* h[1]: LT=1, 16B L2=10 */
+ u32 h[OPA_VNIC_HDR_QW_LEN] = {0, 0xc0000000, 0, 0, 0};
+
+ h[2] = l4_type;
+ h[3] = entropy;
+ h[4] = l4_hdr << OPA_VNIC_L4_HDR_SHFT;
+
+ /* Extract and set 4 upper bits and 20 lower bits of the lids */
+ h[0] |= (slid & OPA_16B_LID_MASK);
+ h[2] |= ((slid >> (20 - OPA_16B_SLID_HIGH_SHFT)) & OPA_16B_SLID_MASK);
+
+ h[1] |= (dlid & OPA_16B_LID_MASK);
+ h[2] |= ((dlid >> (20 - OPA_16B_DLID_HIGH_SHFT)) & OPA_16B_DLID_MASK);
+
+ h[0] |= (len << OPA_16B_LEN_SHFT);
+ h[1] |= (rc << OPA_16B_RC_SHFT);
+ h[1] |= (sc << OPA_16B_SC_SHFT);
+ h[2] |= ((u32)pkey << OPA_16B_PKEY_SHFT);
+
+ memcpy(hdr, h, OPA_VNIC_HDR_LEN);
+}
+
+/*
+ * Using a simple hash table for mac table implementation with the last octet
+ * of mac address as a key.
+ */
+static void opa_vnic_free_mac_tbl(struct hlist_head *mactbl)
+{
+ struct opa_vnic_mac_tbl_node *node;
+ struct hlist_node *tmp;
+ int bkt;
+
+ if (!mactbl)
+ return;
+
+ vnic_hash_for_each_safe(mactbl, bkt, tmp, node, hlist) {
+ hash_del(&node->hlist);
+ kfree(node);
+ }
+ kfree(mactbl);
+}
+
+static struct hlist_head *opa_vnic_alloc_mac_tbl(void)
+{
+ u32 size = sizeof(struct hlist_head) * OPA_VNIC_MAC_TBL_SIZE;
+ struct hlist_head *mactbl;
+
+ mactbl = kzalloc(size, GFP_KERNEL);
+ if (!mactbl)
+ return ERR_PTR(-ENOMEM);
+
+ vnic_hash_init(mactbl);
+ return mactbl;
+}
+
+/* opa_vnic_release_mac_tbl - empty and free the mac table */
+void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter)
+{
+ struct hlist_head *mactbl;
+
+ mutex_lock(&adapter->mactbl_lock);
+ mactbl = rcu_access_pointer(adapter->mactbl);
+ rcu_assign_pointer(adapter->mactbl, NULL);
+ synchronize_rcu();
+ opa_vnic_free_mac_tbl(mactbl);
+ mutex_unlock(&adapter->mactbl_lock);
+}
+
+/*
+ * opa_vnic_query_mac_tbl - query the mac table for a section
+ *
+ * This function implements query of specific function of the mac table.
+ * The function also expects the requested range to be valid.
+ */
+void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl)
+{
+ struct opa_vnic_mac_tbl_node *node;
+ struct hlist_head *mactbl;
+ int bkt;
+ u16 loffset, lnum_entries;
+
+ rcu_read_lock();
+ mactbl = rcu_dereference(adapter->mactbl);
+ if (!mactbl)
+ goto get_mac_done;
+
+ loffset = be16_to_cpu(tbl->offset);
+ lnum_entries = be16_to_cpu(tbl->num_entries);
+
+ vnic_hash_for_each(mactbl, bkt, node, hlist) {
+ struct __opa_vnic_mactable_entry *nentry = &node->entry;
+ struct opa_veswport_mactable_entry *entry;
+
+ if ((node->index < loffset) ||
+ (node->index >= (loffset + lnum_entries)))
+ continue;
+
+ /* populate entry in the tbl corresponding to the index */
+ entry = &tbl->tbl_entries[node->index - loffset];
+ memcpy(entry->mac_addr, nentry->mac_addr,
+ ARRAY_SIZE(entry->mac_addr));
+ memcpy(entry->mac_addr_mask, nentry->mac_addr_mask,
+ ARRAY_SIZE(entry->mac_addr_mask));
+ entry->dlid_sd = cpu_to_be32(nentry->dlid_sd);
+ }
+ tbl->mac_tbl_digest = cpu_to_be32(adapter->info.vport.mac_tbl_digest);
+get_mac_done:
+ rcu_read_unlock();
+}
+
+/*
+ * opa_vnic_update_mac_tbl - update mac table section
+ *
+ * This function updates the specified section of the mac table.
+ * The procedure includes following steps.
+ * - Allocate a new mac (hash) table.
+ * - Add the specified entries to the new table.
+ * (except the ones that are requested to be deleted).
+ * - Add all the other entries from the old mac table.
+ * - If there is a failure, free the new table and return.
+ * - Switch to the new table.
+ * - Free the old table and return.
+ *
+ * The function also expects the requested range to be valid.
+ */
+int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl)
+{
+ struct opa_vnic_mac_tbl_node *node, *new_node;
+ struct hlist_head *new_mactbl, *old_mactbl;
+ int i, bkt, rc = 0;
+ u8 key;
+ u16 loffset, lnum_entries;
+
+ mutex_lock(&adapter->mactbl_lock);
+ /* allocate new mac table */
+ new_mactbl = opa_vnic_alloc_mac_tbl();
+ if (IS_ERR(new_mactbl)) {
+ mutex_unlock(&adapter->mactbl_lock);
+ return PTR_ERR(new_mactbl);
+ }
+
+ loffset = be16_to_cpu(tbl->offset);
+ lnum_entries = be16_to_cpu(tbl->num_entries);
+
+ /* add updated entries to the new mac table */
+ for (i = 0; i < lnum_entries; i++) {
+ struct __opa_vnic_mactable_entry *nentry;
+ struct opa_veswport_mactable_entry *entry =
+ &tbl->tbl_entries[i];
+ u8 *mac_addr = entry->mac_addr;
+ u8 empty_mac[ETH_ALEN] = { 0 };
+
+ v_dbg("new mac entry %4d: %02x:%02x:%02x:%02x:%02x:%02x %x\n",
+ loffset + i, mac_addr[0], mac_addr[1], mac_addr[2],
+ mac_addr[3], mac_addr[4], mac_addr[5],
+ entry->dlid_sd);
+
+ /* if the entry is being removed, do not add it */
+ if (!memcmp(mac_addr, empty_mac, ARRAY_SIZE(empty_mac)))
+ continue;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node) {
+ rc = -ENOMEM;
+ goto updt_done;
+ }
+
+ node->index = loffset + i;
+ nentry = &node->entry;
+ memcpy(nentry->mac_addr, entry->mac_addr,
+ ARRAY_SIZE(nentry->mac_addr));
+ memcpy(nentry->mac_addr_mask, entry->mac_addr_mask,
+ ARRAY_SIZE(nentry->mac_addr_mask));
+ nentry->dlid_sd = be32_to_cpu(entry->dlid_sd);
+ key = node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX];
+ vnic_hash_add(new_mactbl, &node->hlist, key);
+ }
+
+ /* add other entries from current mac table to new mac table */
+ old_mactbl = rcu_access_pointer(adapter->mactbl);
+ if (!old_mactbl)
+ goto switch_tbl;
+
+ vnic_hash_for_each(old_mactbl, bkt, node, hlist) {
+ if ((node->index >= loffset) &&
+ (node->index < (loffset + lnum_entries)))
+ continue;
+
+ new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
+ if (!new_node) {
+ rc = -ENOMEM;
+ goto updt_done;
+ }
+
+ new_node->index = node->index;
+ memcpy(&new_node->entry, &node->entry, sizeof(node->entry));
+ key = new_node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX];
+ vnic_hash_add(new_mactbl, &new_node->hlist, key);
+ }
+
+switch_tbl:
+ /* switch to new table */
+ rcu_assign_pointer(adapter->mactbl, new_mactbl);
+ synchronize_rcu();
+
+ adapter->info.vport.mac_tbl_digest = be32_to_cpu(tbl->mac_tbl_digest);
+updt_done:
+ /* upon failure, free the new table; otherwise, free the old table */
+ if (rc)
+ opa_vnic_free_mac_tbl(new_mactbl);
+ else
+ opa_vnic_free_mac_tbl(old_mactbl);
+
+ mutex_unlock(&adapter->mactbl_lock);
+ return rc;
+}
+
+/* opa_vnic_chk_mac_tbl - check mac table for dlid */
+static uint32_t opa_vnic_chk_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct ethhdr *mac_hdr)
+{
+ struct opa_vnic_mac_tbl_node *node;
+ struct hlist_head *mactbl;
+ u32 dlid = 0;
+ u8 key;
+
+ rcu_read_lock();
+ mactbl = rcu_dereference(adapter->mactbl);
+ if (unlikely(!mactbl))
+ goto chk_done;
+
+ key = mac_hdr->h_dest[OPA_VNIC_MAC_HASH_IDX];
+ vnic_hash_for_each_possible(mactbl, node, hlist, key) {
+ struct __opa_vnic_mactable_entry *entry = &node->entry;
+
+ /* if related to source mac, skip */
+ if (unlikely(OPA_VNIC_DLID_SD_IS_SRC_MAC(entry->dlid_sd)))
+ continue;
+
+ if (!memcmp(node->entry.mac_addr, mac_hdr->h_dest,
+ ARRAY_SIZE(node->entry.mac_addr))) {
+ /* mac address found */
+ dlid = OPA_VNIC_DLID_SD_GET_DLID(node->entry.dlid_sd);
+ break;
+ }
+ }
+
+chk_done:
+ rcu_read_unlock();
+ return dlid;
+}
+
+/* opa_vnic_get_dlid - find and return the DLID */
+static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter *adapter,
+ struct sk_buff *skb, u8 def_port)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ u32 dlid;
+
+ dlid = opa_vnic_chk_mac_tbl(adapter, mac_hdr);
+ if (dlid)
+ return dlid;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest)) {
+ dlid = info->vesw.u_mcast_dlid;
+ } else {
+ if (is_local_ether_addr(mac_hdr->h_dest)) {
+ dlid = ((uint32_t)mac_hdr->h_dest[5] << 16) |
+ ((uint32_t)mac_hdr->h_dest[4] << 8) |
+ mac_hdr->h_dest[3];
+ if (unlikely(!dlid))
+ v_warn("Null dlid in MAC address\n");
+ } else if (def_port != OPA_VNIC_INVALID_PORT) {
+ dlid = info->vesw.u_ucast_dlid[def_port];
+ }
+ }
+
+ return dlid;
+}
+
+/* opa_vnic_get_sc - return the service class */
+static u8 opa_vnic_get_sc(struct __opa_veswport_info *info,
+ struct sk_buff *skb)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ u16 vlan_tci;
+ u8 sc;
+
+ if (!__vlan_get_tag(skb, &vlan_tci)) {
+ u8 pcp = OPA_VNIC_VLAN_PCP(vlan_tci);
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ sc = info->vport.pcp_to_sc_mc[pcp];
+ else
+ sc = info->vport.pcp_to_sc_uc[pcp];
+ } else {
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ sc = info->vport.non_vlan_sc_mc;
+ else
+ sc = info->vport.non_vlan_sc_uc;
+ }
+
+ return sc;
+}
+
+u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ struct __opa_veswport_info *info = &adapter->info;
+ u8 vl;
+
+ if (skb_vlan_tag_present(skb)) {
+ u8 pcp = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ vl = info->vport.pcp_to_vl_mc[pcp];
+ else
+ vl = info->vport.pcp_to_vl_uc[pcp];
+ } else {
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ vl = info->vport.non_vlan_vl_mc;
+ else
+ vl = info->vport.non_vlan_vl_uc;
+ }
+
+ return vl;
+}
+
+/* opa_vnic_calc_entropy - calculate the packet entropy */
+u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+ u16 hash16;
+
+ /*
+ * Get flow based 16-bit hash and then XOR the upper and lower bytes
+ * to get the entropy.
+ * __skb_tx_hash limits qcount to 16 bits. Hence, get 15-bit hash.
+ */
+ hash16 = __skb_tx_hash(adapter->netdev, skb, BIT(15));
+ return (u8)((hash16 >> 8) ^ (hash16 & 0xff));
+}
+
+/* opa_vnic_get_def_port - get default port based on entropy */
+static inline u8 opa_vnic_get_def_port(struct opa_vnic_adapter *adapter,
+ u8 entropy)
+{
+ u8 flow_id;
+
+ /* Add the upper and lower 4-bits of entropy to get the flow id */
+ flow_id = ((entropy & 0xf) + (entropy >> 4));
+ return adapter->flow_tbl[flow_id & (OPA_VNIC_FLOW_TBL_SIZE - 1)];
+}
+
+/* Calculate packet length including OPA header, crc and padding */
+static inline int opa_vnic_wire_length(struct sk_buff *skb)
+{
+ u32 pad_len;
+
+ /* padding for 8 bytes size alignment */
+ pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
+ pad_len += OPA_VNIC_ICRC_TAIL_LEN;
+
+ return (skb->len + pad_len) >> 3;
+}
+
+/* opa_vnic_encap_skb - encapsulate skb packet with OPA header and meta data */
+void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct opa_vnic_skb_mdata *mdata;
+ u8 def_port, sc, entropy, *hdr;
+ u16 len, l4_hdr;
+ u32 dlid;
+
+ hdr = skb_push(skb, OPA_VNIC_HDR_LEN);
+
+ entropy = opa_vnic_calc_entropy(adapter, skb);
+ def_port = opa_vnic_get_def_port(adapter, entropy);
+ len = opa_vnic_wire_length(skb);
+ dlid = opa_vnic_get_dlid(adapter, skb, def_port);
+ sc = opa_vnic_get_sc(info, skb);
+ l4_hdr = info->vesw.vesw_id;
+
+ mdata = (struct opa_vnic_skb_mdata *)skb_push(skb, sizeof(*mdata));
+ mdata->vl = opa_vnic_get_vl(adapter, skb);
+ mdata->entropy = entropy;
+ mdata->flags = 0;
+ if (unlikely(!dlid)) {
+ mdata->flags = OPA_VNIC_SKB_MDATA_ENCAP_ERR;
+ return;
+ }
+
+ opa_vnic_make_header(hdr, info->vport.encap_slid, dlid, len,
+ info->vesw.pkey, entropy, sc, 0,
+ OPA_VNIC_L4_ETHR, l4_hdr);
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
new file mode 100644
index 000000000000..4c434b9dd84c
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
@@ -0,0 +1,489 @@
+#ifndef _OPA_VNIC_ENCAP_H
+#define _OPA_VNIC_ENCAP_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains all OPA VNIC declaration required for encapsulation
+ * and decapsulation of Ethernet packets
+ */
+
+#include <linux/types.h>
+#include <rdma/ib_mad.h>
+
+/* EMA class version */
+#define OPA_EMA_CLASS_VERSION 0x80
+
+/*
+ * Define the Intel vendor management class for OPA
+ * ETHERNET MANAGEMENT
+ */
+#define OPA_MGMT_CLASS_INTEL_EMA 0x34
+
+/* EM attribute IDs */
+#define OPA_EM_ATTR_CLASS_PORT_INFO 0x0001
+#define OPA_EM_ATTR_VESWPORT_INFO 0x0011
+#define OPA_EM_ATTR_VESWPORT_MAC_ENTRIES 0x0012
+#define OPA_EM_ATTR_IFACE_UCAST_MACS 0x0013
+#define OPA_EM_ATTR_IFACE_MCAST_MACS 0x0014
+#define OPA_EM_ATTR_DELETE_VESW 0x0015
+#define OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS 0x0020
+#define OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS 0x0022
+
+/* VNIC configured and operational state values */
+#define OPA_VNIC_STATE_DROP_ALL 0x1
+#define OPA_VNIC_STATE_FORWARDING 0x3
+
+#define OPA_VESW_MAX_NUM_DEF_PORT 16
+#define OPA_VNIC_MAX_NUM_PCP 8
+
+#define OPA_VNIC_EMA_DATA (OPA_MGMT_MAD_SIZE - IB_MGMT_VENDOR_HDR)
+
+/* Defines for vendor specific notice(trap) attributes */
+#define OPA_INTEL_EMA_NOTICE_TYPE_INFO 0x04
+
+/* INTEL OUI */
+#define INTEL_OUI_1 0x00
+#define INTEL_OUI_2 0x06
+#define INTEL_OUI_3 0x6a
+
+/* Trap opcodes sent from VNIC */
+#define OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE 0x1
+#define OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE 0x2
+#define OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE 0x3
+
+#define OPA_VNIC_DLID_SD_IS_SRC_MAC(dlid_sd) (!!((dlid_sd) & 0x20))
+#define OPA_VNIC_DLID_SD_GET_DLID(dlid_sd) ((dlid_sd) >> 8)
+
+/* VNIC Ethernet link status */
+#define OPA_VNIC_ETH_LINK_UP 1
+#define OPA_VNIC_ETH_LINK_DOWN 2
+
+/**
+ * struct opa_vesw_info - OPA vnic switch information
+ * @fabric_id: 10-bit fabric id
+ * @vesw_id: 12-bit virtual ethernet switch id
+ * @def_port_mask: bitmask of default ports
+ * @pkey: partition key
+ * @u_mcast_dlid: unknown multicast dlid
+ * @u_ucast_dlid: array of unknown unicast dlids
+ * @eth_mtu: MTUs for each vlan PCP
+ * @eth_mtu_non_vlan: MTU for non vlan packets
+ */
+struct opa_vesw_info {
+ __be16 fabric_id;
+ __be16 vesw_id;
+
+ u8 rsvd0[6];
+ __be16 def_port_mask;
+
+ u8 rsvd1[2];
+ __be16 pkey;
+
+ u8 rsvd2[4];
+ __be32 u_mcast_dlid;
+ __be32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT];
+
+ u8 rsvd3[44];
+ __be16 eth_mtu[OPA_VNIC_MAX_NUM_PCP];
+ __be16 eth_mtu_non_vlan;
+ u8 rsvd4[2];
+} __packed;
+
+/**
+ * struct opa_per_veswport_info - OPA vnic per port information
+ * @port_num: port number
+ * @eth_link_status: current ethernet link state
+ * @base_mac_addr: base mac address
+ * @config_state: configured port state
+ * @oper_state: operational port state
+ * @max_mac_tbl_ent: max number of mac table entries
+ * @max_smac_ent: max smac entries in mac table
+ * @mac_tbl_digest: mac table digest
+ * @encap_slid: base slid for the port
+ * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets
+ * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets
+ * @pcp_to_sc_mc: sc by pcp index for multicast ethernet packets
+ * @pcp_to_vl_mc: vl by pcp index for multicast ethernet packets
+ * @non_vlan_sc_uc: sc for non-vlan unicast ethernet packets
+ * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets
+ * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets
+ * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets
+ * @uc_macs_gen_count: generation count for unicast macs list
+ * @mc_macs_gen_count: generation count for multicast macs list
+ */
+struct opa_per_veswport_info {
+ __be32 port_num;
+
+ u8 eth_link_status;
+ u8 rsvd0[3];
+
+ u8 base_mac_addr[ETH_ALEN];
+ u8 config_state;
+ u8 oper_state;
+
+ __be16 max_mac_tbl_ent;
+ __be16 max_smac_ent;
+ __be32 mac_tbl_digest;
+ u8 rsvd1[4];
+
+ __be32 encap_slid;
+
+ u8 pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_sc_mc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_mc[OPA_VNIC_MAX_NUM_PCP];
+
+ u8 non_vlan_sc_uc;
+ u8 non_vlan_vl_uc;
+ u8 non_vlan_sc_mc;
+ u8 non_vlan_vl_mc;
+
+ u8 rsvd2[48];
+
+ __be16 uc_macs_gen_count;
+ __be16 mc_macs_gen_count;
+
+ u8 rsvd3[8];
+} __packed;
+
+/**
+ * struct opa_veswport_info - OPA vnic port information
+ * @vesw: OPA vnic switch information
+ * @vport: OPA vnic per port information
+ *
+ * On host, each of the virtual ethernet ports belongs
+ * to a different virtual ethernet switches.
+ */
+struct opa_veswport_info {
+ struct opa_vesw_info vesw;
+ struct opa_per_veswport_info vport;
+};
+
+/**
+ * struct opa_veswport_mactable_entry - single entry in the forwarding table
+ * @mac_addr: MAC address
+ * @mac_addr_mask: MAC address bit mask
+ * @dlid_sd: Matching DLID and side data
+ *
+ * On the host each virtual ethernet port will have
+ * a forwarding table. These tables are used to
+ * map a MAC to a LID and other data. For more
+ * details see struct opa_veswport_mactable_entries.
+ * This is the structure of a single mactable entry
+ */
+struct opa_veswport_mactable_entry {
+ u8 mac_addr[ETH_ALEN];
+ u8 mac_addr_mask[ETH_ALEN];
+ __be32 dlid_sd;
+} __packed;
+
+/**
+ * struct opa_veswport_mactable - Forwarding table array
+ * @offset: mac table starting offset
+ * @num_entries: Number of entries to get or set
+ * @mac_tbl_digest: mac table digest
+ * @tbl_entries[]: Array of table entries
+ *
+ * The EM sends down this structure in a MAD indicating
+ * the starting offset in the forwarding table that this
+ * entry is to be loaded into and the number of entries
+ * that that this MAD instance contains
+ * The mac_tbl_digest has been added to this MAD structure. It will be set by
+ * the EM and it will be used by the EM to check if there are any
+ * discrepancies with this value and the value
+ * maintained by the EM in the case of VNIC port being deleted or unloaded
+ * A new instantiation of a VNIC will always have a value of zero.
+ * This value is stored as part of the vnic adapter structure and will be
+ * accessed by the GET and SET routines for both the mactable entries and the
+ * veswport info.
+ */
+struct opa_veswport_mactable {
+ __be16 offset;
+ __be16 num_entries;
+ __be32 mac_tbl_digest;
+ struct opa_veswport_mactable_entry tbl_entries[0];
+} __packed;
+
+/**
+ * struct opa_veswport_summary_counters - summary counters
+ * @vp_instance: vport instance on the OPA port
+ * @vesw_id: virtual ethernet switch id
+ * @veswport_num: virtual ethernet switch port number
+ * @tx_errors: transmit errors
+ * @rx_errors: receive errors
+ * @tx_packets: transmit packets
+ * @rx_packets: receive packets
+ * @tx_bytes: transmit bytes
+ * @rx_bytes: receive bytes
+ * @tx_unicast: unicast packets transmitted
+ * @tx_mcastbcast: multicast/broadcast packets transmitted
+ * @tx_untagged: non-vlan packets transmitted
+ * @tx_vlan: vlan packets transmitted
+ * @tx_64_size: transmit packet length is 64 bytes
+ * @tx_65_127: transmit packet length is >=65 and < 127 bytes
+ * @tx_128_255: transmit packet length is >=128 and < 255 bytes
+ * @tx_256_511: transmit packet length is >=256 and < 511 bytes
+ * @tx_512_1023: transmit packet length is >=512 and < 1023 bytes
+ * @tx_1024_1518: transmit packet length is >=1024 and < 1518 bytes
+ * @tx_1519_max: transmit packet length >= 1519 bytes
+ * @rx_unicast: unicast packets received
+ * @rx_mcastbcast: multicast/broadcast packets received
+ * @rx_untagged: non-vlan packets received
+ * @rx_vlan: vlan packets received
+ * @rx_64_size: received packet length is 64 bytes
+ * @rx_65_127: received packet length is >=65 and < 127 bytes
+ * @rx_128_255: received packet length is >=128 and < 255 bytes
+ * @rx_256_511: received packet length is >=256 and < 511 bytes
+ * @rx_512_1023: received packet length is >=512 and < 1023 bytes
+ * @rx_1024_1518: received packet length is >=1024 and < 1518 bytes
+ * @rx_1519_max: received packet length >= 1519 bytes
+ *
+ * All the above are counters of corresponding conditions.
+ */
+struct opa_veswport_summary_counters {
+ __be16 vp_instance;
+ __be16 vesw_id;
+ __be32 veswport_num;
+
+ __be64 tx_errors;
+ __be64 rx_errors;
+ __be64 tx_packets;
+ __be64 rx_packets;
+ __be64 tx_bytes;
+ __be64 rx_bytes;
+
+ __be64 tx_unicast;
+ __be64 tx_mcastbcast;
+
+ __be64 tx_untagged;
+ __be64 tx_vlan;
+
+ __be64 tx_64_size;
+ __be64 tx_65_127;
+ __be64 tx_128_255;
+ __be64 tx_256_511;
+ __be64 tx_512_1023;
+ __be64 tx_1024_1518;
+ __be64 tx_1519_max;
+
+ __be64 rx_unicast;
+ __be64 rx_mcastbcast;
+
+ __be64 rx_untagged;
+ __be64 rx_vlan;
+
+ __be64 rx_64_size;
+ __be64 rx_65_127;
+ __be64 rx_128_255;
+ __be64 rx_256_511;
+ __be64 rx_512_1023;
+ __be64 rx_1024_1518;
+ __be64 rx_1519_max;
+
+ __be64 reserved[16];
+} __packed;
+
+/**
+ * struct opa_veswport_error_counters - error counters
+ * @vp_instance: vport instance on the OPA port
+ * @vesw_id: virtual ethernet switch id
+ * @veswport_num: virtual ethernet switch port number
+ * @tx_errors: transmit errors
+ * @rx_errors: receive errors
+ * @tx_smac_filt: smac filter errors
+ * @tx_dlid_zero: transmit packets with invalid dlid
+ * @tx_logic: other transmit errors
+ * @tx_drop_state: packet tansmission in non-forward port state
+ * @rx_bad_veswid: received packet with invalid vesw id
+ * @rx_runt: received ethernet packet with length < 64 bytes
+ * @rx_oversize: received ethernet packet with length > MTU size
+ * @rx_eth_down: received packets when interface is down
+ * @rx_drop_state: received packets in non-forwarding port state
+ * @rx_logic: other receive errors
+ *
+ * All the above are counters of corresponding erorr conditions.
+ */
+struct opa_veswport_error_counters {
+ __be16 vp_instance;
+ __be16 vesw_id;
+ __be32 veswport_num;
+
+ __be64 tx_errors;
+ __be64 rx_errors;
+
+ __be64 rsvd0;
+ __be64 tx_smac_filt;
+ __be64 rsvd1;
+ __be64 rsvd2;
+ __be64 rsvd3;
+ __be64 tx_dlid_zero;
+ __be64 rsvd4;
+ __be64 tx_logic;
+ __be64 rsvd5;
+ __be64 tx_drop_state;
+
+ __be64 rx_bad_veswid;
+ __be64 rsvd6;
+ __be64 rx_runt;
+ __be64 rx_oversize;
+ __be64 rsvd7;
+ __be64 rx_eth_down;
+ __be64 rx_drop_state;
+ __be64 rx_logic;
+ __be64 rsvd8;
+
+ __be64 rsvd9[16];
+} __packed;
+
+/**
+ * struct opa_veswport_trap - Trap message sent to EM by VNIC
+ * @fabric_id: 10 bit fabric id
+ * @veswid: 12 bit virtual ethernet switch id
+ * @veswportnum: logical port number on the Virtual switch
+ * @opaportnum: physical port num (redundant on host)
+ * @veswportindex: switch port index on opa port 0 based
+ * @opcode: operation
+ * @reserved: 32 bit for alignment
+ *
+ * The VNIC will send trap messages to the Ethernet manager to
+ * inform it about changes to the VNIC config, behaviour etc.
+ * This is the format of the trap payload.
+ */
+struct opa_veswport_trap {
+ __be16 fabric_id;
+ __be16 veswid;
+ __be32 veswportnum;
+ __be16 opaportnum;
+ u8 veswportindex;
+ u8 opcode;
+ __be32 reserved;
+} __packed;
+
+/**
+ * struct opa_vnic_iface_macs_entry - single entry in the mac list
+ * @mac_addr: MAC address
+ */
+struct opa_vnic_iface_mac_entry {
+ u8 mac_addr[ETH_ALEN];
+};
+
+/**
+ * struct opa_veswport_iface_macs - Msg to set globally administered MAC
+ * @start_idx: position of first entry (0 based)
+ * @num_macs_in_msg: number of MACs in this message
+ * @tot_macs_in_lst: The total number of MACs the agent has
+ * @gen_count: gen_count to indicate change
+ * @entry: The mac list entry
+ *
+ * Same attribute IDS and attribute modifiers as in locally administered
+ * addresses used to set globally administered addresses
+ */
+struct opa_veswport_iface_macs {
+ __be16 start_idx;
+ __be16 num_macs_in_msg;
+ __be16 tot_macs_in_lst;
+ __be16 gen_count;
+ struct opa_vnic_iface_mac_entry entry[0];
+} __packed;
+
+/**
+ * struct opa_vnic_vema_mad - Generic VEMA MAD
+ * @mad_hdr: Generic MAD header
+ * @rmpp_hdr: RMPP header for vendor specific MADs
+ * @oui: Unique org identifier
+ * @data: MAD data
+ */
+struct opa_vnic_vema_mad {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+ u8 reserved;
+ u8 oui[3];
+ u8 data[OPA_VNIC_EMA_DATA];
+};
+
+/**
+ * struct opa_vnic_notice_attr - Generic Notice MAD
+ * @gen_type: Generic/Specific bit and type of notice
+ * @oui_1: Vendor ID byte 1
+ * @oui_2: Vendor ID byte 2
+ * @oui_3: Vendor ID byte 3
+ * @trap_num: Trap number
+ * @toggle_count: Notice toggle bit and count value
+ * @issuer_lid: Trap issuer's lid
+ * @issuer_gid: Issuer GID (only if Report method)
+ * @raw_data: Trap message body
+ */
+struct opa_vnic_notice_attr {
+ u8 gen_type;
+ u8 oui_1;
+ u8 oui_2;
+ u8 oui_3;
+ __be16 trap_num;
+ __be16 toggle_count;
+ __be32 issuer_lid;
+ __be32 reserved;
+ u8 issuer_gid[16];
+ u8 raw_data[64];
+} __packed;
+
+/**
+ * struct opa_vnic_vema_mad_trap - Generic VEMA MAD Trap
+ * @mad_hdr: Generic MAD header
+ * @rmpp_hdr: RMPP header for vendor specific MADs
+ * @oui: Unique org identifier
+ * @notice: Notice structure
+ */
+struct opa_vnic_vema_mad_trap {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+ u8 reserved;
+ u8 oui[3];
+ struct opa_vnic_notice_attr notice;
+};
+
+#endif /* _OPA_VNIC_ENCAP_H */
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
new file mode 100644
index 000000000000..d66540e24885
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC ethtool functions
+ */
+
+#include <linux/ethtool.h>
+
+#include "opa_vnic_internal.h"
+
+enum {NETDEV_STATS, VNIC_STATS};
+
+struct vnic_stats {
+ char stat_string[ETH_GSTRING_LEN];
+ struct {
+ int sizeof_stat;
+ int stat_offset;
+ };
+};
+
+#define VNIC_STAT(m) { FIELD_SIZEOF(struct opa_vnic_stats, m), \
+ offsetof(struct opa_vnic_stats, m) }
+
+static struct vnic_stats vnic_gstrings_stats[] = {
+ /* NETDEV stats */
+ {"rx_packets", VNIC_STAT(netstats.rx_packets)},
+ {"tx_packets", VNIC_STAT(netstats.tx_packets)},
+ {"rx_bytes", VNIC_STAT(netstats.rx_bytes)},
+ {"tx_bytes", VNIC_STAT(netstats.tx_bytes)},
+ {"rx_errors", VNIC_STAT(netstats.rx_errors)},
+ {"tx_errors", VNIC_STAT(netstats.tx_errors)},
+ {"rx_dropped", VNIC_STAT(netstats.rx_dropped)},
+ {"tx_dropped", VNIC_STAT(netstats.tx_dropped)},
+
+ /* SUMMARY counters */
+ {"tx_unicast", VNIC_STAT(tx_grp.unicast)},
+ {"tx_mcastbcast", VNIC_STAT(tx_grp.mcastbcast)},
+ {"tx_untagged", VNIC_STAT(tx_grp.untagged)},
+ {"tx_vlan", VNIC_STAT(tx_grp.vlan)},
+
+ {"tx_64_size", VNIC_STAT(tx_grp.s_64)},
+ {"tx_65_127", VNIC_STAT(tx_grp.s_65_127)},
+ {"tx_128_255", VNIC_STAT(tx_grp.s_128_255)},
+ {"tx_256_511", VNIC_STAT(tx_grp.s_256_511)},
+ {"tx_512_1023", VNIC_STAT(tx_grp.s_512_1023)},
+ {"tx_1024_1518", VNIC_STAT(tx_grp.s_1024_1518)},
+ {"tx_1519_max", VNIC_STAT(tx_grp.s_1519_max)},
+
+ {"rx_unicast", VNIC_STAT(rx_grp.unicast)},
+ {"rx_mcastbcast", VNIC_STAT(rx_grp.mcastbcast)},
+ {"rx_untagged", VNIC_STAT(rx_grp.untagged)},
+ {"rx_vlan", VNIC_STAT(rx_grp.vlan)},
+
+ {"rx_64_size", VNIC_STAT(rx_grp.s_64)},
+ {"rx_65_127", VNIC_STAT(rx_grp.s_65_127)},
+ {"rx_128_255", VNIC_STAT(rx_grp.s_128_255)},
+ {"rx_256_511", VNIC_STAT(rx_grp.s_256_511)},
+ {"rx_512_1023", VNIC_STAT(rx_grp.s_512_1023)},
+ {"rx_1024_1518", VNIC_STAT(rx_grp.s_1024_1518)},
+ {"rx_1519_max", VNIC_STAT(rx_grp.s_1519_max)},
+
+ /* ERROR counters */
+ {"rx_fifo_errors", VNIC_STAT(netstats.rx_fifo_errors)},
+ {"rx_length_errors", VNIC_STAT(netstats.rx_length_errors)},
+
+ {"tx_fifo_errors", VNIC_STAT(netstats.tx_fifo_errors)},
+ {"tx_carrier_errors", VNIC_STAT(netstats.tx_carrier_errors)},
+
+ {"tx_dlid_zero", VNIC_STAT(tx_dlid_zero)},
+ {"tx_drop_state", VNIC_STAT(tx_drop_state)},
+ {"rx_drop_state", VNIC_STAT(rx_drop_state)},
+ {"rx_oversize", VNIC_STAT(rx_oversize)},
+ {"rx_runt", VNIC_STAT(rx_runt)},
+};
+
+#define VNIC_STATS_LEN ARRAY_SIZE(vnic_gstrings_stats)
+
+/* vnic_get_drvinfo - get driver info */
+static void vnic_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ strlcpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, opa_vnic_driver_version,
+ sizeof(drvinfo->version));
+ strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent),
+ sizeof(drvinfo->bus_info));
+}
+
+/* vnic_get_sset_count - get string set count */
+static int vnic_get_sset_count(struct net_device *netdev, int sset)
+{
+ return (sset == ETH_SS_STATS) ? VNIC_STATS_LEN : -EOPNOTSUPP;
+}
+
+/* vnic_get_ethtool_stats - get statistics */
+static void vnic_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct opa_vnic_stats vstats;
+ int i;
+
+ memset(&vstats, 0, sizeof(vstats));
+ mutex_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats);
+ for (i = 0; i < VNIC_STATS_LEN; i++) {
+ char *p = (char *)&vstats + vnic_gstrings_stats[i].stat_offset;
+
+ data[i] = (vnic_gstrings_stats[i].sizeof_stat ==
+ sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+ }
+ mutex_unlock(&adapter->stats_lock);
+}
+
+/* vnic_get_strings - get strings */
+static void vnic_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+ int i;
+
+ if (stringset != ETH_SS_STATS)
+ return;
+
+ for (i = 0; i < VNIC_STATS_LEN; i++)
+ memcpy(data + i * ETH_GSTRING_LEN,
+ vnic_gstrings_stats[i].stat_string,
+ ETH_GSTRING_LEN);
+}
+
+/* ethtool ops */
+static const struct ethtool_ops opa_vnic_ethtool_ops = {
+ .get_drvinfo = vnic_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = vnic_get_strings,
+ .get_sset_count = vnic_get_sset_count,
+ .get_ethtool_stats = vnic_get_ethtool_stats,
+};
+
+/* opa_vnic_set_ethtool_ops - set ethtool ops */
+void opa_vnic_set_ethtool_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &opa_vnic_ethtool_ops;
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
new file mode 100644
index 000000000000..6bba886bec1f
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
@@ -0,0 +1,329 @@
+#ifndef _OPA_VNIC_INTERNAL_H
+#define _OPA_VNIC_INTERNAL_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC driver internal declarations
+ */
+
+#include <linux/bitops.h>
+#include <linux/etherdevice.h>
+#include <linux/hashtable.h>
+#include <linux/sizes.h>
+#include <rdma/opa_vnic.h>
+
+#include "opa_vnic_encap.h"
+
+#define OPA_VNIC_VLAN_PCP(vlan_tci) \
+ (((vlan_tci) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT)
+
+/* Flow to default port redirection table size */
+#define OPA_VNIC_FLOW_TBL_SIZE 32
+
+/* Invalid port number */
+#define OPA_VNIC_INVALID_PORT 0xff
+
+struct opa_vnic_adapter;
+
+/**
+ * struct __opa_vesw_info - OPA vnic virtual switch info
+ *
+ * Same as opa_vesw_info without bitwise attribute.
+ */
+struct __opa_vesw_info {
+ u16 fabric_id;
+ u16 vesw_id;
+
+ u8 rsvd0[6];
+ u16 def_port_mask;
+
+ u8 rsvd1[2];
+ u16 pkey;
+
+ u8 rsvd2[4];
+ u32 u_mcast_dlid;
+ u32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT];
+
+ u8 rsvd3[44];
+ u16 eth_mtu[OPA_VNIC_MAX_NUM_PCP];
+ u16 eth_mtu_non_vlan;
+ u8 rsvd4[2];
+} __packed;
+
+/**
+ * struct __opa_per_veswport_info - OPA vnic per port info
+ *
+ * Same as opa_per_veswport_info without bitwise attribute.
+ */
+struct __opa_per_veswport_info {
+ u32 port_num;
+
+ u8 eth_link_status;
+ u8 rsvd0[3];
+
+ u8 base_mac_addr[ETH_ALEN];
+ u8 config_state;
+ u8 oper_state;
+
+ u16 max_mac_tbl_ent;
+ u16 max_smac_ent;
+ u32 mac_tbl_digest;
+ u8 rsvd1[4];
+
+ u32 encap_slid;
+
+ u8 pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_sc_mc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_mc[OPA_VNIC_MAX_NUM_PCP];
+
+ u8 non_vlan_sc_uc;
+ u8 non_vlan_vl_uc;
+ u8 non_vlan_sc_mc;
+ u8 non_vlan_vl_mc;
+
+ u8 rsvd2[48];
+
+ u16 uc_macs_gen_count;
+ u16 mc_macs_gen_count;
+
+ u8 rsvd3[8];
+} __packed;
+
+/**
+ * struct __opa_veswport_info - OPA vnic port info
+ *
+ * Same as opa_veswport_info without bitwise attribute.
+ */
+struct __opa_veswport_info {
+ struct __opa_vesw_info vesw;
+ struct __opa_per_veswport_info vport;
+};
+
+/**
+ * struct __opa_veswport_trap - OPA vnic trap info
+ *
+ * Same as opa_veswport_trap without bitwise attribute.
+ */
+struct __opa_veswport_trap {
+ u16 fabric_id;
+ u16 veswid;
+ u32 veswportnum;
+ u16 opaportnum;
+ u8 veswportindex;
+ u8 opcode;
+ u32 reserved;
+} __packed;
+
+/**
+ * struct opa_vnic_ctrl_port - OPA virtual NIC control port
+ * @ibdev: pointer to ib device
+ * @ops: opa vnic control operations
+ * @num_ports: number of opa ports
+ */
+struct opa_vnic_ctrl_port {
+ struct ib_device *ibdev;
+ struct opa_vnic_ctrl_ops *ops;
+ u8 num_ports;
+};
+
+/**
+ * struct opa_vnic_adapter - OPA VNIC netdev private data structure
+ * @netdev: pointer to associated netdev
+ * @ibdev: ib device
+ * @cport: pointer to opa vnic control port
+ * @rn_ops: rdma netdev's net_device_ops
+ * @port_num: OPA port number
+ * @vport_num: vesw port number
+ * @lock: adapter lock
+ * @info: virtual ethernet switch port information
+ * @vema_mac_addr: mac address configured by vema
+ * @umac_hash: unicast maclist hash
+ * @mmac_hash: multicast maclist hash
+ * @mactbl: hash table of MAC entries
+ * @mactbl_lock: mac table lock
+ * @stats_lock: statistics lock
+ * @flow_tbl: flow to default port redirection table
+ * @trap_timeout: trap timeout
+ * @trap_count: no. of traps allowed within timeout period
+ */
+struct opa_vnic_adapter {
+ struct net_device *netdev;
+ struct ib_device *ibdev;
+ struct opa_vnic_ctrl_port *cport;
+ const struct net_device_ops *rn_ops;
+
+ u8 port_num;
+ u8 vport_num;
+
+ /* Lock used around concurrent updates to netdev */
+ struct mutex lock;
+
+ struct __opa_veswport_info info;
+ u8 vema_mac_addr[ETH_ALEN];
+ u32 umac_hash;
+ u32 mmac_hash;
+ struct hlist_head __rcu *mactbl;
+
+ /* Lock used to protect updates to mac table */
+ struct mutex mactbl_lock;
+
+ /* Lock used to protect access to vnic counters */
+ struct mutex stats_lock;
+
+ u8 flow_tbl[OPA_VNIC_FLOW_TBL_SIZE];
+
+ unsigned long trap_timeout;
+ u8 trap_count;
+};
+
+/* Same as opa_veswport_mactable_entry, but without bitwise attribute */
+struct __opa_vnic_mactable_entry {
+ u8 mac_addr[ETH_ALEN];
+ u8 mac_addr_mask[ETH_ALEN];
+ u32 dlid_sd;
+} __packed;
+
+/**
+ * struct opa_vnic_mac_tbl_node - OPA VNIC mac table node
+ * @hlist: hash list handle
+ * @index: index of entry in the mac table
+ * @entry: entry in the table
+ */
+struct opa_vnic_mac_tbl_node {
+ struct hlist_node hlist;
+ u16 index;
+ struct __opa_vnic_mactable_entry entry;
+};
+
+#define v_dbg(format, arg...) \
+ netdev_dbg(adapter->netdev, format, ## arg)
+#define v_err(format, arg...) \
+ netdev_err(adapter->netdev, format, ## arg)
+#define v_info(format, arg...) \
+ netdev_info(adapter->netdev, format, ## arg)
+#define v_warn(format, arg...) \
+ netdev_warn(adapter->netdev, format, ## arg)
+
+#define c_err(format, arg...) \
+ dev_err(&cport->ibdev->dev, format, ## arg)
+#define c_info(format, arg...) \
+ dev_info(&cport->ibdev->dev, format, ## arg)
+#define c_dbg(format, arg...) \
+ dev_dbg(&cport->ibdev->dev, format, ## arg)
+
+/* The maximum allowed entries in the mac table */
+#define OPA_VNIC_MAC_TBL_MAX_ENTRIES 2048
+/* Limit of smac entries in mac table */
+#define OPA_VNIC_MAX_SMAC_LIMIT 256
+
+/* The last octet of the MAC address is used as the key to the hash table */
+#define OPA_VNIC_MAC_HASH_IDX 5
+
+/* The VNIC MAC hash table is of size 2^8 */
+#define OPA_VNIC_MAC_TBL_HASH_BITS 8
+#define OPA_VNIC_MAC_TBL_SIZE BIT(OPA_VNIC_MAC_TBL_HASH_BITS)
+
+/* VNIC HASH MACROS */
+#define vnic_hash_init(hashtable) __hash_init(hashtable, OPA_VNIC_MAC_TBL_SIZE)
+
+#define vnic_hash_add(hashtable, node, key) \
+ hlist_add_head(node, \
+ &hashtable[hash_min(key, ilog2(OPA_VNIC_MAC_TBL_SIZE))])
+
+#define vnic_hash_for_each_safe(name, bkt, tmp, obj, member) \
+ for ((bkt) = 0, obj = NULL; \
+ !obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++) \
+ hlist_for_each_entry_safe(obj, tmp, &name[bkt], member)
+
+#define vnic_hash_for_each_possible(name, obj, member, key) \
+ hlist_for_each_entry(obj, \
+ &name[hash_min(key, ilog2(OPA_VNIC_MAC_TBL_SIZE))], member)
+
+#define vnic_hash_for_each(name, bkt, obj, member) \
+ for ((bkt) = 0, obj = NULL; \
+ !obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++) \
+ hlist_for_each_entry(obj, &name[bkt], member)
+
+extern char opa_vnic_driver_name[];
+extern const char opa_vnic_driver_version[];
+
+struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
+ u8 port_num, u8 vport_num);
+void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter);
+void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter);
+void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter);
+void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl);
+int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl);
+void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs);
+void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs);
+void opa_vnic_get_summary_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_summary_counters *cntrs);
+void opa_vnic_get_error_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_error_counters *cntrs);
+void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info);
+void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info);
+void opa_vnic_get_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info);
+void opa_vnic_set_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info);
+void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event);
+void opa_vnic_set_ethtool_ops(struct net_device *netdev);
+void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
+ struct __opa_veswport_trap *data, u32 lid);
+
+#endif /* _OPA_VNIC_INTERNAL_H */
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
new file mode 100644
index 000000000000..905f39dda5aa
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA Virtual Network Interface Controller (VNIC) driver
+ * netdev functionality.
+ */
+
+#include <linux/module.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+
+#include "opa_vnic_internal.h"
+
+#define OPA_TX_TIMEOUT_MS 1000
+
+#define OPA_VNIC_SKB_HEADROOM \
+ ALIGN((OPA_VNIC_HDR_LEN + OPA_VNIC_SKB_MDATA_LEN), 8)
+
+/* This function is overloaded for opa_vnic specific implementation */
+static void opa_vnic_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct opa_vnic_stats vstats;
+
+ memset(&vstats, 0, sizeof(vstats));
+ mutex_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats);
+ mutex_unlock(&adapter->stats_lock);
+ memcpy(stats, &vstats.netstats, sizeof(*stats));
+}
+
+/* opa_netdev_start_xmit - transmit function */
+static netdev_tx_t opa_netdev_start_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+
+ v_dbg("xmit: queue %d skb len %d\n", skb->queue_mapping, skb->len);
+ /* pad to ensure mininum ethernet packet length */
+ if (unlikely(skb->len < ETH_ZLEN)) {
+ if (skb_padto(skb, ETH_ZLEN))
+ return NETDEV_TX_OK;
+
+ skb_put(skb, ETH_ZLEN - skb->len);
+ }
+
+ opa_vnic_encap_skb(adapter, skb);
+ return adapter->rn_ops->ndo_start_xmit(skb, netdev);
+}
+
+static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
+ void *accel_priv,
+ select_queue_fallback_t fallback)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct opa_vnic_skb_mdata *mdata;
+ int rc;
+
+ /* pass entropy and vl as metadata in skb */
+ mdata = (struct opa_vnic_skb_mdata *)skb_push(skb, sizeof(*mdata));
+ mdata->entropy = opa_vnic_calc_entropy(adapter, skb);
+ mdata->vl = opa_vnic_get_vl(adapter, skb);
+ rc = adapter->rn_ops->ndo_select_queue(netdev, skb,
+ accel_priv, fallback);
+ skb_pull(skb, sizeof(*mdata));
+ return rc;
+}
+
+/* opa_vnic_process_vema_config - process vema configuration updates */
+void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct rdma_netdev *rn = netdev_priv(adapter->netdev);
+ u8 port_num[OPA_VESW_MAX_NUM_DEF_PORT] = { 0 };
+ struct net_device *netdev = adapter->netdev;
+ u8 i, port_count = 0;
+ u16 port_mask;
+
+ /* If the base_mac_addr is changed, update the interface mac address */
+ if (memcmp(info->vport.base_mac_addr, adapter->vema_mac_addr,
+ ARRAY_SIZE(info->vport.base_mac_addr))) {
+ struct sockaddr saddr;
+
+ memcpy(saddr.sa_data, info->vport.base_mac_addr,
+ ARRAY_SIZE(info->vport.base_mac_addr));
+ mutex_lock(&adapter->lock);
+ eth_mac_addr(netdev, &saddr);
+ memcpy(adapter->vema_mac_addr,
+ info->vport.base_mac_addr, ETH_ALEN);
+ mutex_unlock(&adapter->lock);
+ }
+
+ rn->set_id(netdev, info->vesw.vesw_id);
+
+ /* Handle MTU limit change */
+ rtnl_lock();
+ netdev->max_mtu = max_t(unsigned int, info->vesw.eth_mtu_non_vlan,
+ netdev->min_mtu);
+ if (netdev->mtu > netdev->max_mtu)
+ dev_set_mtu(netdev, netdev->max_mtu);
+ rtnl_unlock();
+
+ /* Update flow to default port redirection table */
+ port_mask = info->vesw.def_port_mask;
+ for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++) {
+ if (port_mask & 1)
+ port_num[port_count++] = i;
+ port_mask >>= 1;
+ }
+
+ /*
+ * Build the flow table. Flow table is required when destination LID
+ * is not available. Up to OPA_VNIC_FLOW_TBL_SIZE flows supported.
+ * Each flow need a default port number to get its dlid from the
+ * u_ucast_dlid array.
+ */
+ for (i = 0; i < OPA_VNIC_FLOW_TBL_SIZE; i++)
+ adapter->flow_tbl[i] = port_count ? port_num[i % port_count] :
+ OPA_VNIC_INVALID_PORT;
+
+ /* Operational state can only be DROP_ALL or FORWARDING */
+ if (info->vport.config_state == OPA_VNIC_STATE_FORWARDING) {
+ info->vport.oper_state = OPA_VNIC_STATE_FORWARDING;
+ netif_dormant_off(netdev);
+ } else {
+ info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL;
+ netif_dormant_on(netdev);
+ }
+}
+
+/*
+ * Set the power on default values in adapter's vema interface structure.
+ */
+static inline void opa_vnic_set_pod_values(struct opa_vnic_adapter *adapter)
+{
+ adapter->info.vport.max_mac_tbl_ent = OPA_VNIC_MAC_TBL_MAX_ENTRIES;
+ adapter->info.vport.max_smac_ent = OPA_VNIC_MAX_SMAC_LIMIT;
+ adapter->info.vport.config_state = OPA_VNIC_STATE_DROP_ALL;
+ adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN;
+}
+
+/* opa_vnic_set_mac_addr - change mac address */
+static int opa_vnic_set_mac_addr(struct net_device *netdev, void *addr)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct sockaddr *sa = addr;
+ int rc;
+
+ if (!memcmp(netdev->dev_addr, sa->sa_data, ETH_ALEN))
+ return 0;
+
+ mutex_lock(&adapter->lock);
+ rc = eth_mac_addr(netdev, addr);
+ mutex_unlock(&adapter->lock);
+ if (rc)
+ return rc;
+
+ adapter->info.vport.uc_macs_gen_count++;
+ opa_vnic_vema_report_event(adapter,
+ OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE);
+ return 0;
+}
+
+/*
+ * opa_vnic_mac_send_event - post event on possible mac list exchange
+ * Send trap when digest from uc/mc mac list differs from previous run.
+ * Digest is evaluated similar to how cksum does.
+ */
+static void opa_vnic_mac_send_event(struct net_device *netdev, u8 event)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct netdev_hw_addr *ha;
+ struct netdev_hw_addr_list *hw_list;
+ u32 *ref_crc;
+ u32 l, crc = 0;
+
+ switch (event) {
+ case OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE:
+ hw_list = &netdev->uc;
+ adapter->info.vport.uc_macs_gen_count++;
+ ref_crc = &adapter->umac_hash;
+ break;
+ case OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE:
+ hw_list = &netdev->mc;
+ adapter->info.vport.mc_macs_gen_count++;
+ ref_crc = &adapter->mmac_hash;
+ break;
+ default:
+ return;
+ }
+ netdev_hw_addr_list_for_each(ha, hw_list) {
+ crc = crc32_le(crc, ha->addr, ETH_ALEN);
+ }
+ l = netdev_hw_addr_list_count(hw_list) * ETH_ALEN;
+ crc = ~crc32_le(crc, (void *)&l, sizeof(l));
+
+ if (crc != *ref_crc) {
+ *ref_crc = crc;
+ opa_vnic_vema_report_event(adapter, event);
+ }
+}
+
+/* opa_vnic_set_rx_mode - handle uc/mc mac list change */
+static void opa_vnic_set_rx_mode(struct net_device *netdev)
+{
+ opa_vnic_mac_send_event(netdev,
+ OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE);
+
+ opa_vnic_mac_send_event(netdev,
+ OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE);
+}
+
+/* opa_netdev_open - activate network interface */
+static int opa_netdev_open(struct net_device *netdev)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ int rc;
+
+ rc = adapter->rn_ops->ndo_open(adapter->netdev);
+ if (rc) {
+ v_dbg("open failed %d\n", rc);
+ return rc;
+ }
+
+ /* Update eth link status and send trap */
+ adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_UP;
+ opa_vnic_vema_report_event(adapter,
+ OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE);
+ return 0;
+}
+
+/* opa_netdev_close - disable network interface */
+static int opa_netdev_close(struct net_device *netdev)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ int rc;
+
+ rc = adapter->rn_ops->ndo_stop(adapter->netdev);
+ if (rc) {
+ v_dbg("close failed %d\n", rc);
+ return rc;
+ }
+
+ /* Update eth link status and send trap */
+ adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN;
+ opa_vnic_vema_report_event(adapter,
+ OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE);
+ return 0;
+}
+
+/* netdev ops */
+static const struct net_device_ops opa_netdev_ops = {
+ .ndo_open = opa_netdev_open,
+ .ndo_stop = opa_netdev_close,
+ .ndo_start_xmit = opa_netdev_start_xmit,
+ .ndo_get_stats64 = opa_vnic_get_stats64,
+ .ndo_set_rx_mode = opa_vnic_set_rx_mode,
+ .ndo_select_queue = opa_vnic_select_queue,
+ .ndo_set_mac_address = opa_vnic_set_mac_addr,
+};
+
+/* opa_vnic_add_netdev - create vnic netdev interface */
+struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
+ u8 port_num, u8 vport_num)
+{
+ struct opa_vnic_adapter *adapter;
+ struct net_device *netdev;
+ struct rdma_netdev *rn;
+ int rc;
+
+ netdev = ibdev->alloc_rdma_netdev(ibdev, port_num,
+ RDMA_NETDEV_OPA_VNIC,
+ "veth%d", NET_NAME_UNKNOWN,
+ ether_setup);
+ if (!netdev)
+ return ERR_PTR(-ENOMEM);
+ else if (IS_ERR(netdev))
+ return ERR_CAST(netdev);
+
+ adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+ if (!adapter) {
+ rc = -ENOMEM;
+ goto adapter_err;
+ }
+
+ rn = netdev_priv(netdev);
+ rn->clnt_priv = adapter;
+ rn->hca = ibdev;
+ rn->port_num = port_num;
+ adapter->netdev = netdev;
+ adapter->ibdev = ibdev;
+ adapter->port_num = port_num;
+ adapter->vport_num = vport_num;
+ adapter->rn_ops = netdev->netdev_ops;
+
+ netdev->netdev_ops = &opa_netdev_ops;
+ netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netdev->hard_header_len += OPA_VNIC_SKB_HEADROOM;
+ mutex_init(&adapter->lock);
+ mutex_init(&adapter->mactbl_lock);
+ mutex_init(&adapter->stats_lock);
+
+ SET_NETDEV_DEV(netdev, ibdev->dev.parent);
+
+ opa_vnic_set_ethtool_ops(netdev);
+
+ opa_vnic_set_pod_values(adapter);
+
+ rc = register_netdev(netdev);
+ if (rc)
+ goto netdev_err;
+
+ netif_carrier_off(netdev);
+ netif_dormant_on(netdev);
+ v_info("initialized\n");
+
+ return adapter;
+netdev_err:
+ mutex_destroy(&adapter->lock);
+ mutex_destroy(&adapter->mactbl_lock);
+ mutex_destroy(&adapter->stats_lock);
+ kfree(adapter);
+adapter_err:
+ ibdev->free_rdma_netdev(netdev);
+
+ return ERR_PTR(rc);
+}
+
+/* opa_vnic_rem_netdev - remove vnic netdev interface */
+void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct ib_device *ibdev = adapter->ibdev;
+
+ v_info("removing\n");
+ unregister_netdev(netdev);
+ opa_vnic_release_mac_tbl(adapter);
+ mutex_destroy(&adapter->lock);
+ mutex_destroy(&adapter->mactbl_lock);
+ mutex_destroy(&adapter->stats_lock);
+ kfree(adapter);
+ ibdev->free_rdma_netdev(netdev);
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
new file mode 100644
index 000000000000..875694f9a7f9
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -0,0 +1,1056 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA Virtual Network Interface Controller (VNIC)
+ * Ethernet Management Agent (EMA) driver
+ */
+
+#include <linux/module.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_smi.h>
+
+#include "opa_vnic_internal.h"
+
+#define DRV_VERSION "1.0"
+char opa_vnic_driver_name[] = "opa_vnic";
+const char opa_vnic_driver_version[] = DRV_VERSION;
+
+/*
+ * The trap service level is kept in bits 3 to 7 in the trap_sl_rsvd
+ * field in the class port info MAD.
+ */
+#define GET_TRAP_SL_FROM_CLASS_PORT_INFO(x) (((x) >> 3) & 0x1f)
+
+/* Cap trap bursts to a reasonable limit good for normal cases */
+#define OPA_VNIC_TRAP_BURST_LIMIT 4
+
+/*
+ * VNIC trap limit timeout.
+ * Inverse of cap2_mask response time out (1.0737 secs) = 0.9
+ * secs approx IB spec 13.4.6.2.1 PortInfoSubnetTimeout and
+ * 13.4.9 Traps.
+ */
+#define OPA_VNIC_TRAP_TIMEOUT ((4096 * (1UL << 18)) / 1000)
+
+#define OPA_VNIC_UNSUP_ATTR \
+ cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB)
+
+#define OPA_VNIC_INVAL_ATTR \
+ cpu_to_be16(IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE)
+
+#define OPA_VNIC_CLASS_CAP_TRAP 0x1
+
+/* Maximum number of VNIC ports supported */
+#define OPA_VNIC_MAX_NUM_VPORT 255
+
+/**
+ * struct opa_vnic_vema_port -- VNIC VEMA port details
+ * @cport: pointer to port
+ * @mad_agent: pointer to mad agent for port
+ * @class_port_info: Class port info information.
+ * @tid: Transaction id
+ * @port_num: OPA port number
+ * @vport_idr: vnic ports idr
+ * @event_handler: ib event handler
+ * @lock: adapter interface lock
+ */
+struct opa_vnic_vema_port {
+ struct opa_vnic_ctrl_port *cport;
+ struct ib_mad_agent *mad_agent;
+ struct opa_class_port_info class_port_info;
+ u64 tid;
+ u8 port_num;
+ struct idr vport_idr;
+ struct ib_event_handler event_handler;
+
+ /* Lock to query/update network adapter */
+ struct mutex lock;
+};
+
+static void opa_vnic_vema_add_one(struct ib_device *device);
+static void opa_vnic_vema_rem_one(struct ib_device *device,
+ void *client_data);
+
+static struct ib_client opa_vnic_client = {
+ .name = opa_vnic_driver_name,
+ .add = opa_vnic_vema_add_one,
+ .remove = opa_vnic_vema_rem_one,
+};
+
+/**
+ * vema_get_vport_num -- Get the vnic from the mad
+ * @recvd_mad: Received mad
+ *
+ * Return: returns value of the vnic port number
+ */
+static inline u8 vema_get_vport_num(struct opa_vnic_vema_mad *recvd_mad)
+{
+ return be32_to_cpu(recvd_mad->mad_hdr.attr_mod) & 0xff;
+}
+
+/**
+ * vema_get_vport_adapter -- Get vnic port adapter from recvd mad
+ * @recvd_mad: received mad
+ * @port: ptr to port struct on which MAD was recvd
+ *
+ * Return: vnic adapter
+ */
+static inline struct opa_vnic_adapter *
+vema_get_vport_adapter(struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_port *port)
+{
+ u8 vport_num = vema_get_vport_num(recvd_mad);
+
+ return idr_find(&port->vport_idr, vport_num);
+}
+
+/**
+ * vema_mac_tbl_req_ok -- Check if mac request has correct values
+ * @mac_tbl: mac table
+ *
+ * This function checks for the validity of the offset and number of
+ * entries required.
+ *
+ * Return: true if offset and num_entries are valid
+ */
+static inline bool vema_mac_tbl_req_ok(struct opa_veswport_mactable *mac_tbl)
+{
+ u16 offset, num_entries;
+ u16 req_entries = ((OPA_VNIC_EMA_DATA - sizeof(*mac_tbl)) /
+ sizeof(mac_tbl->tbl_entries[0]));
+
+ offset = be16_to_cpu(mac_tbl->offset);
+ num_entries = be16_to_cpu(mac_tbl->num_entries);
+
+ return ((num_entries <= req_entries) &&
+ (offset + num_entries <= OPA_VNIC_MAC_TBL_MAX_ENTRIES));
+}
+
+/*
+ * Return the power on default values in the port info structure
+ * in big endian format as required by MAD.
+ */
+static inline void vema_get_pod_values(struct opa_veswport_info *port_info)
+{
+ memset(port_info, 0, sizeof(*port_info));
+ port_info->vport.max_mac_tbl_ent =
+ cpu_to_be16(OPA_VNIC_MAC_TBL_MAX_ENTRIES);
+ port_info->vport.max_smac_ent =
+ cpu_to_be16(OPA_VNIC_MAX_SMAC_LIMIT);
+ port_info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL;
+ port_info->vport.config_state = OPA_VNIC_STATE_DROP_ALL;
+}
+
+/**
+ * vema_add_vport -- Add a new vnic port
+ * @port: ptr to opa_vnic_vema_port struct
+ * @vport_num: vnic port number (to be added)
+ *
+ * Return a pointer to the vnic adapter structure
+ */
+static struct opa_vnic_adapter *vema_add_vport(struct opa_vnic_vema_port *port,
+ u8 vport_num)
+{
+ struct opa_vnic_ctrl_port *cport = port->cport;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = opa_vnic_add_netdev(cport->ibdev, port->port_num, vport_num);
+ if (!IS_ERR(adapter)) {
+ int rc;
+
+ adapter->cport = cport;
+ rc = idr_alloc(&port->vport_idr, adapter, vport_num,
+ vport_num + 1, GFP_NOWAIT);
+ if (rc < 0) {
+ opa_vnic_rem_netdev(adapter);
+ adapter = ERR_PTR(rc);
+ }
+ }
+
+ return adapter;
+}
+
+/**
+ * vema_get_class_port_info -- Get class info for port
+ * @port: Port on whic MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function copies the latest class port info value set for the
+ * port and stores it for generating traps
+ */
+static void vema_get_class_port_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_class_port_info *port_info;
+
+ port_info = (struct opa_class_port_info *)rsp_mad->data;
+ memcpy(port_info, &port->class_port_info, sizeof(*port_info));
+ port_info->base_version = OPA_MGMT_BASE_VERSION,
+ port_info->class_version = OPA_EMA_CLASS_VERSION;
+
+ /*
+ * Set capability mask bit indicating agent generates traps,
+ * and set the maximum number of VNIC ports supported.
+ */
+ port_info->cap_mask = cpu_to_be16((OPA_VNIC_CLASS_CAP_TRAP |
+ (OPA_VNIC_MAX_NUM_VPORT << 8)));
+
+ /*
+ * Since a get routine is always sent by the EM first we
+ * set the expected response time to
+ * 4.096 usec * 2^18 == 1.0737 sec here.
+ */
+ port_info->cap_mask2_resp_time = cpu_to_be32(18);
+}
+
+/**
+ * vema_set_class_port_info -- Get class info for port
+ * @port: Port on whic MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function updates the port class info for the specific vnic
+ * and sets up the response mad data
+ */
+static void vema_set_class_port_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ memcpy(&port->class_port_info, recvd_mad->data,
+ sizeof(port->class_port_info));
+
+ vema_get_class_port_info(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_get_veswport_info -- Get veswport info
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ */
+static void vema_get_veswport_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_info *port_info =
+ (struct opa_veswport_info *)rsp_mad->data;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (adapter) {
+ memset(port_info, 0, sizeof(*port_info));
+ opa_vnic_get_vesw_info(adapter, &port_info->vesw);
+ opa_vnic_get_per_veswport_info(adapter,
+ &port_info->vport);
+ } else {
+ vema_get_pod_values(port_info);
+ }
+}
+
+/**
+ * vema_set_veswport_info -- Set veswport info
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function gets the port class infor for vnic
+ */
+static void vema_set_veswport_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_vnic_ctrl_port *cport = port->cport;
+ struct opa_veswport_info *port_info;
+ struct opa_vnic_adapter *adapter;
+ u8 vport_num;
+
+ vport_num = vema_get_vport_num(recvd_mad);
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ adapter = vema_add_vport(port, vport_num);
+ if (IS_ERR(adapter)) {
+ c_err("failed to add vport %d: %ld\n",
+ vport_num, PTR_ERR(adapter));
+ goto err_exit;
+ }
+ }
+
+ port_info = (struct opa_veswport_info *)recvd_mad->data;
+ opa_vnic_set_vesw_info(adapter, &port_info->vesw);
+ opa_vnic_set_per_veswport_info(adapter, &port_info->vport);
+
+ /* Process the new config settings */
+ opa_vnic_process_vema_config(adapter);
+
+ vema_get_veswport_info(port, recvd_mad, rsp_mad);
+ return;
+
+err_exit:
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+}
+
+/**
+ * vema_get_mac_entries -- Get MAC entries in VNIC MAC table
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function gets the MAC entries that are programmed into
+ * the VNIC MAC forwarding table. It checks for the validity of
+ * the index into the MAC table and the number of entries that
+ * are to be retrieved.
+ */
+static void vema_get_mac_entries(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_mactable *mac_tbl_in, *mac_tbl_out;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ mac_tbl_in = (struct opa_veswport_mactable *)recvd_mad->data;
+ mac_tbl_out = (struct opa_veswport_mactable *)rsp_mad->data;
+
+ if (vema_mac_tbl_req_ok(mac_tbl_in)) {
+ mac_tbl_out->offset = mac_tbl_in->offset;
+ mac_tbl_out->num_entries = mac_tbl_in->num_entries;
+ opa_vnic_query_mac_tbl(adapter, mac_tbl_out);
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ }
+}
+
+/**
+ * vema_set_mac_entries -- Set MAC entries in VNIC MAC table
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function sets the MAC entries in the VNIC forwarding table
+ * It checks for the validity of the index and the number of forwarding
+ * table entries to be programmed.
+ */
+static void vema_set_mac_entries(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_mactable *mac_tbl;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ mac_tbl = (struct opa_veswport_mactable *)recvd_mad->data;
+ if (vema_mac_tbl_req_ok(mac_tbl)) {
+ if (opa_vnic_update_mac_tbl(adapter, mac_tbl))
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ }
+ vema_get_mac_entries(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_set_delete_vesw -- Reset VESW info to POD values
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function clears all the fields of veswport info for the requested vesw
+ * and sets them back to the power-on default values. It does not delete the
+ * vesw.
+ */
+static void vema_set_delete_vesw(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_info *port_info =
+ (struct opa_veswport_info *)rsp_mad->data;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ vema_get_pod_values(port_info);
+ opa_vnic_set_vesw_info(adapter, &port_info->vesw);
+ opa_vnic_set_per_veswport_info(adapter, &port_info->vport);
+
+ /* Process the new config settings */
+ opa_vnic_process_vema_config(adapter);
+
+ opa_vnic_release_mac_tbl(adapter);
+
+ vema_get_veswport_info(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_get_mac_list -- Get the unicast/multicast macs.
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ * @attr_id: Attribute ID indicating multicast or unicast mac list
+ */
+static void vema_get_mac_list(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad,
+ u16 attr_id)
+{
+ struct opa_veswport_iface_macs *macs_in, *macs_out;
+ int max_entries = (OPA_VNIC_EMA_DATA - sizeof(*macs_out)) / ETH_ALEN;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ macs_in = (struct opa_veswport_iface_macs *)recvd_mad->data;
+ macs_out = (struct opa_veswport_iface_macs *)rsp_mad->data;
+
+ macs_out->start_idx = macs_in->start_idx;
+ if (macs_in->num_macs_in_msg)
+ macs_out->num_macs_in_msg = macs_in->num_macs_in_msg;
+ else
+ macs_out->num_macs_in_msg = cpu_to_be16(max_entries);
+
+ if (attr_id == OPA_EM_ATTR_IFACE_MCAST_MACS)
+ opa_vnic_query_mcast_macs(adapter, macs_out);
+ else
+ opa_vnic_query_ucast_macs(adapter, macs_out);
+}
+
+/**
+ * vema_get_summary_counters -- Gets summary counters.
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_get_summary_counters(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_summary_counters *cntrs;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (adapter) {
+ cntrs = (struct opa_veswport_summary_counters *)rsp_mad->data;
+ opa_vnic_get_summary_counters(adapter, cntrs);
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ }
+}
+
+/**
+ * vema_get_error_counters -- Gets summary counters.
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_get_error_counters(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_error_counters *cntrs;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (adapter) {
+ cntrs = (struct opa_veswport_error_counters *)rsp_mad->data;
+ opa_vnic_get_error_counters(adapter, cntrs);
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ }
+}
+
+/**
+ * vema_get -- Process received get MAD
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_get(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id);
+
+ switch (attr_id) {
+ case OPA_EM_ATTR_CLASS_PORT_INFO:
+ vema_get_class_port_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_INFO:
+ vema_get_veswport_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES:
+ vema_get_mac_entries(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_IFACE_UCAST_MACS:
+ /* fall through */
+ case OPA_EM_ATTR_IFACE_MCAST_MACS:
+ vema_get_mac_list(port, recvd_mad, rsp_mad, attr_id);
+ break;
+ case OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS:
+ vema_get_summary_counters(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS:
+ vema_get_error_counters(port, recvd_mad, rsp_mad);
+ break;
+ default:
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ break;
+ }
+}
+
+/**
+ * vema_set -- Process received set MAD
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_set(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id);
+
+ switch (attr_id) {
+ case OPA_EM_ATTR_CLASS_PORT_INFO:
+ vema_set_class_port_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_INFO:
+ vema_set_veswport_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES:
+ vema_set_mac_entries(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_DELETE_VESW:
+ vema_set_delete_vesw(port, recvd_mad, rsp_mad);
+ break;
+ default:
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ break;
+ }
+}
+
+/**
+ * vema_send -- Send handler for VEMA MAD agent
+ * @mad_agent: pointer to the mad agent
+ * @mad_wc: pointer to mad send work completion information
+ *
+ * Free all the data structures associated with the sent MAD
+ */
+static void vema_send(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_wc *mad_wc)
+{
+ rdma_destroy_ah(mad_wc->send_buf->ah);
+ ib_free_send_mad(mad_wc->send_buf);
+}
+
+/**
+ * vema_recv -- Recv handler for VEMA MAD agent
+ * @mad_agent: pointer to the mad agent
+ * @send_buf: Send buffer if found, else NULL
+ * @mad_wc: pointer to mad send work completion information
+ *
+ * Handle only set and get methods and respond to other methods
+ * as unsupported. Allocate response buffer and address handle
+ * for the response MAD.
+ */
+static void vema_recv(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
+ struct ib_mad_recv_wc *mad_wc)
+{
+ struct opa_vnic_vema_port *port;
+ struct ib_ah *ah;
+ struct ib_mad_send_buf *rsp;
+ struct opa_vnic_vema_mad *vema_mad;
+
+ if (!mad_wc || !mad_wc->recv_buf.mad)
+ return;
+
+ port = mad_agent->context;
+ ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
+ mad_wc->recv_buf.grh, mad_agent->port_num);
+ if (IS_ERR(ah))
+ goto free_recv_mad;
+
+ rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
+ mad_wc->wc->pkey_index, 0,
+ IB_MGMT_VENDOR_HDR, OPA_VNIC_EMA_DATA,
+ GFP_KERNEL, OPA_MGMT_BASE_VERSION);
+ if (IS_ERR(rsp))
+ goto err_rsp;
+
+ rsp->ah = ah;
+ vema_mad = rsp->mad;
+ memcpy(vema_mad, mad_wc->recv_buf.mad, IB_MGMT_VENDOR_HDR);
+ vema_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
+ vema_mad->mad_hdr.status = 0;
+
+ /* Lock ensures network adapter is not removed */
+ mutex_lock(&port->lock);
+
+ switch (mad_wc->recv_buf.mad->mad_hdr.method) {
+ case IB_MGMT_METHOD_GET:
+ vema_get(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad,
+ vema_mad);
+ break;
+ case IB_MGMT_METHOD_SET:
+ vema_set(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad,
+ vema_mad);
+ break;
+ default:
+ vema_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ break;
+ }
+ mutex_unlock(&port->lock);
+
+ if (!ib_post_send_mad(rsp, NULL)) {
+ /*
+ * with post send successful ah and send mad
+ * will be destroyed in send handler
+ */
+ goto free_recv_mad;
+ }
+
+ ib_free_send_mad(rsp);
+
+err_rsp:
+ rdma_destroy_ah(ah);
+free_recv_mad:
+ ib_free_recv_mad(mad_wc);
+}
+
+/**
+ * vema_get_port -- Gets the opa_vnic_vema_port
+ * @cport: pointer to control dev
+ * @port_num: Port number
+ *
+ * This function loops through the ports and returns
+ * the opa_vnic_vema port structure that is associated
+ * with the OPA port number
+ *
+ * Return: ptr to requested opa_vnic_vema_port strucure
+ * if success, NULL if not
+ */
+static struct opa_vnic_vema_port *
+vema_get_port(struct opa_vnic_ctrl_port *cport, u8 port_num)
+{
+ struct opa_vnic_vema_port *port = (void *)cport + sizeof(*cport);
+
+ if (port_num > cport->num_ports)
+ return NULL;
+
+ return port + (port_num - 1);
+}
+
+/**
+ * opa_vnic_vema_send_trap -- This function sends a trap to the EM
+ * @cport: pointer to vnic control port
+ * @data: pointer to trap data filled by calling function
+ * @lid: issuers lid (encap_slid from vesw_port_info)
+ *
+ * This function is called from the VNIC driver to send a trap if there
+ * is somethng the EM should be notified about. These events currently
+ * are
+ * 1) UNICAST INTERFACE MACADDRESS changes
+ * 2) MULTICAST INTERFACE MACADDRESS changes
+ * 3) ETHERNET LINK STATUS changes
+ * While allocating the send mad the remote site qpn used is 1
+ * as this is the well known QP.
+ *
+ */
+void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
+ struct __opa_veswport_trap *data, u32 lid)
+{
+ struct opa_vnic_ctrl_port *cport = adapter->cport;
+ struct ib_mad_send_buf *send_buf;
+ struct opa_vnic_vema_port *port;
+ struct ib_device *ibp;
+ struct opa_vnic_vema_mad_trap *trap_mad;
+ struct opa_class_port_info *class;
+ struct rdma_ah_attr ah_attr;
+ struct ib_ah *ah;
+ struct opa_veswport_trap *trap;
+ u32 trap_lid;
+ u16 pkey_idx;
+
+ if (!cport)
+ goto err_exit;
+ ibp = cport->ibdev;
+ port = vema_get_port(cport, data->opaportnum);
+ if (!port || !port->mad_agent)
+ goto err_exit;
+
+ if (time_before(jiffies, adapter->trap_timeout)) {
+ if (adapter->trap_count == OPA_VNIC_TRAP_BURST_LIMIT) {
+ v_warn("Trap rate exceeded\n");
+ goto err_exit;
+ } else {
+ adapter->trap_count++;
+ }
+ } else {
+ adapter->trap_count = 0;
+ }
+
+ class = &port->class_port_info;
+ /* Set up address handle */
+ memset(&ah_attr, 0, sizeof(ah_attr));
+ ah_attr.type = rdma_ah_find_type(ibp, port->port_num);
+ rdma_ah_set_sl(&ah_attr,
+ GET_TRAP_SL_FROM_CLASS_PORT_INFO(class->trap_sl_rsvd));
+ rdma_ah_set_port_num(&ah_attr, port->port_num);
+ trap_lid = be32_to_cpu(class->trap_lid);
+ /*
+ * check for trap lid validity, must not be zero
+ * The trap sink could change after we fashion the MAD but since traps
+ * are not guaranteed we won't use a lock as anyway the change will take
+ * place even with locking.
+ */
+ if (!trap_lid) {
+ c_err("%s: Invalid dlid\n", __func__);
+ goto err_exit;
+ }
+
+ rdma_ah_set_dlid(&ah_attr, trap_lid);
+ ah = rdma_create_ah(port->mad_agent->qp->pd, &ah_attr);
+ if (IS_ERR(ah)) {
+ c_err("%s:Couldn't create new AH = %p\n", __func__, ah);
+ c_err("%s:dlid = %d, sl = %d, port = %d\n", __func__,
+ rdma_ah_get_dlid(&ah_attr), rdma_ah_get_sl(&ah_attr),
+ rdma_ah_get_port_num(&ah_attr));
+ goto err_exit;
+ }
+
+ if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_FULL,
+ &pkey_idx) < 0) {
+ c_err("%s:full key not found, defaulting to partial\n",
+ __func__);
+ if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_PARTIAL,
+ &pkey_idx) < 0)
+ pkey_idx = 1;
+ }
+
+ send_buf = ib_create_send_mad(port->mad_agent, 1, pkey_idx, 0,
+ IB_MGMT_VENDOR_HDR, IB_MGMT_MAD_DATA,
+ GFP_KERNEL, OPA_MGMT_BASE_VERSION);
+ if (IS_ERR(send_buf)) {
+ c_err("%s:Couldn't allocate send buf\n", __func__);
+ goto err_sndbuf;
+ }
+
+ send_buf->ah = ah;
+
+ /* Set up common MAD hdr */
+ trap_mad = send_buf->mad;
+ trap_mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION;
+ trap_mad->mad_hdr.mgmt_class = OPA_MGMT_CLASS_INTEL_EMA;
+ trap_mad->mad_hdr.class_version = OPA_EMA_CLASS_VERSION;
+ trap_mad->mad_hdr.method = IB_MGMT_METHOD_TRAP;
+ port->tid++;
+ trap_mad->mad_hdr.tid = cpu_to_be64(port->tid);
+ trap_mad->mad_hdr.attr_id = IB_SMP_ATTR_NOTICE;
+
+ /* Set up vendor OUI */
+ trap_mad->oui[0] = INTEL_OUI_1;
+ trap_mad->oui[1] = INTEL_OUI_2;
+ trap_mad->oui[2] = INTEL_OUI_3;
+
+ /* Setup notice attribute portion */
+ trap_mad->notice.gen_type = OPA_INTEL_EMA_NOTICE_TYPE_INFO << 1;
+ trap_mad->notice.oui_1 = INTEL_OUI_1;
+ trap_mad->notice.oui_2 = INTEL_OUI_2;
+ trap_mad->notice.oui_3 = INTEL_OUI_3;
+ trap_mad->notice.issuer_lid = cpu_to_be32(lid);
+
+ /* copy the actual trap data */
+ trap = (struct opa_veswport_trap *)trap_mad->notice.raw_data;
+ trap->fabric_id = cpu_to_be16(data->fabric_id);
+ trap->veswid = cpu_to_be16(data->veswid);
+ trap->veswportnum = cpu_to_be32(data->veswportnum);
+ trap->opaportnum = cpu_to_be16(data->opaportnum);
+ trap->veswportindex = data->veswportindex;
+ trap->opcode = data->opcode;
+
+ /* If successful send set up rate limit timeout else bail */
+ if (ib_post_send_mad(send_buf, NULL)) {
+ ib_free_send_mad(send_buf);
+ } else {
+ if (adapter->trap_count)
+ return;
+ adapter->trap_timeout = jiffies +
+ usecs_to_jiffies(OPA_VNIC_TRAP_TIMEOUT);
+ return;
+ }
+
+err_sndbuf:
+ rdma_destroy_ah(ah);
+err_exit:
+ v_err("Aborting trap\n");
+}
+
+static int vema_rem_vport(int id, void *p, void *data)
+{
+ struct opa_vnic_adapter *adapter = p;
+
+ opa_vnic_rem_netdev(adapter);
+ return 0;
+}
+
+static int vema_enable_vport(int id, void *p, void *data)
+{
+ struct opa_vnic_adapter *adapter = p;
+
+ netif_carrier_on(adapter->netdev);
+ return 0;
+}
+
+static int vema_disable_vport(int id, void *p, void *data)
+{
+ struct opa_vnic_adapter *adapter = p;
+
+ netif_carrier_off(adapter->netdev);
+ return 0;
+}
+
+static void opa_vnic_event(struct ib_event_handler *handler,
+ struct ib_event *record)
+{
+ struct opa_vnic_vema_port *port =
+ container_of(handler, struct opa_vnic_vema_port, event_handler);
+ struct opa_vnic_ctrl_port *cport = port->cport;
+
+ if (record->element.port_num != port->port_num)
+ return;
+
+ c_dbg("OPA_VNIC received event %d on device %s port %d\n",
+ record->event, record->device->name, record->element.port_num);
+
+ if (record->event == IB_EVENT_PORT_ERR)
+ idr_for_each(&port->vport_idr, vema_disable_vport, NULL);
+ if (record->event == IB_EVENT_PORT_ACTIVE)
+ idr_for_each(&port->vport_idr, vema_enable_vport, NULL);
+}
+
+/**
+ * vema_unregister -- Unregisters agent
+ * @cport: pointer to control port
+ *
+ * This deletes the registration by VEMA for MADs
+ */
+static void vema_unregister(struct opa_vnic_ctrl_port *cport)
+{
+ int i;
+
+ for (i = 1; i <= cport->num_ports; i++) {
+ struct opa_vnic_vema_port *port = vema_get_port(cport, i);
+
+ if (!port->mad_agent)
+ continue;
+
+ /* Lock ensures no MAD is being processed */
+ mutex_lock(&port->lock);
+ idr_for_each(&port->vport_idr, vema_rem_vport, NULL);
+ mutex_unlock(&port->lock);
+
+ ib_unregister_mad_agent(port->mad_agent);
+ port->mad_agent = NULL;
+ mutex_destroy(&port->lock);
+ idr_destroy(&port->vport_idr);
+ ib_unregister_event_handler(&port->event_handler);
+ }
+}
+
+/**
+ * vema_register -- Registers agent
+ * @cport: pointer to control port
+ *
+ * This function registers the handlers for the VEMA MADs
+ *
+ * Return: returns 0 on success. non zero otherwise
+ */
+static int vema_register(struct opa_vnic_ctrl_port *cport)
+{
+ struct ib_mad_reg_req reg_req = {
+ .mgmt_class = OPA_MGMT_CLASS_INTEL_EMA,
+ .mgmt_class_version = OPA_MGMT_BASE_VERSION,
+ .oui = { INTEL_OUI_1, INTEL_OUI_2, INTEL_OUI_3 }
+ };
+ int i;
+
+ set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
+ set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
+
+ /* register ib event handler and mad agent for each port on dev */
+ for (i = 1; i <= cport->num_ports; i++) {
+ struct opa_vnic_vema_port *port = vema_get_port(cport, i);
+ int ret;
+
+ port->cport = cport;
+ port->port_num = i;
+
+ INIT_IB_EVENT_HANDLER(&port->event_handler,
+ cport->ibdev, opa_vnic_event);
+ ret = ib_register_event_handler(&port->event_handler);
+ if (ret) {
+ c_err("port %d: event handler register failed\n", i);
+ vema_unregister(cport);
+ return ret;
+ }
+
+ idr_init(&port->vport_idr);
+ mutex_init(&port->lock);
+ port->mad_agent = ib_register_mad_agent(cport->ibdev, i,
+ IB_QPT_GSI, &reg_req,
+ IB_MGMT_RMPP_VERSION,
+ vema_send, vema_recv,
+ port, 0);
+ if (IS_ERR(port->mad_agent)) {
+ ret = PTR_ERR(port->mad_agent);
+ port->mad_agent = NULL;
+ mutex_destroy(&port->lock);
+ idr_destroy(&port->vport_idr);
+ vema_unregister(cport);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * opa_vnic_vema_add_one -- Handle new ib device
+ * @device: ib device pointer
+ *
+ * Allocate the vnic control port and initialize it.
+ */
+static void opa_vnic_vema_add_one(struct ib_device *device)
+{
+ struct opa_vnic_ctrl_port *cport;
+ int rc, size = sizeof(*cport);
+
+ if (!rdma_cap_opa_vnic(device))
+ return;
+
+ size += device->phys_port_cnt * sizeof(struct opa_vnic_vema_port);
+ cport = kzalloc(size, GFP_KERNEL);
+ if (!cport)
+ return;
+
+ cport->num_ports = device->phys_port_cnt;
+ cport->ibdev = device;
+
+ /* Initialize opa vnic management agent (vema) */
+ rc = vema_register(cport);
+ if (!rc)
+ c_info("VNIC client initialized\n");
+
+ ib_set_client_data(device, &opa_vnic_client, cport);
+}
+
+/**
+ * opa_vnic_vema_rem_one -- Handle ib device removal
+ * @device: ib device pointer
+ * @client_data: ib client data
+ *
+ * Uninitialize and free the vnic control port.
+ */
+static void opa_vnic_vema_rem_one(struct ib_device *device,
+ void *client_data)
+{
+ struct opa_vnic_ctrl_port *cport = client_data;
+
+ if (!cport)
+ return;
+
+ c_info("removing VNIC client\n");
+ vema_unregister(cport);
+ kfree(cport);
+}
+
+static int __init opa_vnic_init(void)
+{
+ int rc;
+
+ pr_info("OPA Virtual Network Driver - v%s\n",
+ opa_vnic_driver_version);
+
+ rc = ib_register_client(&opa_vnic_client);
+ if (rc)
+ pr_err("VNIC driver register failed %d\n", rc);
+
+ return rc;
+}
+module_init(opa_vnic_init);
+
+static void opa_vnic_deinit(void)
+{
+ ib_unregister_client(&opa_vnic_client);
+}
+module_exit(opa_vnic_deinit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel OPA Virtual Network driver");
+MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
new file mode 100644
index 000000000000..a51bf977f4d6
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC EMA Interface functions.
+ */
+
+#include "opa_vnic_internal.h"
+
+/**
+ * opa_vnic_vema_report_event - sent trap to report the specified event
+ * @adapter: vnic port adapter
+ * @event: event to be reported
+ *
+ * This function calls vema api to sent a trap for the given event.
+ */
+void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct __opa_veswport_trap trap_data;
+
+ trap_data.fabric_id = info->vesw.fabric_id;
+ trap_data.veswid = info->vesw.vesw_id;
+ trap_data.veswportnum = info->vport.port_num;
+ trap_data.opaportnum = adapter->port_num;
+ trap_data.veswportindex = adapter->vport_num;
+ trap_data.opcode = event;
+
+ opa_vnic_vema_send_trap(adapter, &trap_data, info->vport.encap_slid);
+}
+
+/**
+ * opa_vnic_get_error_counters - get summary counters
+ * @adapter: vnic port adapter
+ * @cntrs: pointer to destination summary counters structure
+ *
+ * This function populates the summary counters that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_summary_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_summary_counters *cntrs)
+{
+ struct opa_vnic_stats vstats;
+ __be64 *dst;
+ u64 *src;
+
+ memset(&vstats, 0, sizeof(vstats));
+ mutex_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats);
+ mutex_unlock(&adapter->stats_lock);
+
+ cntrs->vp_instance = cpu_to_be16(adapter->vport_num);
+ cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id);
+ cntrs->veswport_num = cpu_to_be32(adapter->port_num);
+
+ cntrs->tx_errors = cpu_to_be64(vstats.netstats.tx_errors);
+ cntrs->rx_errors = cpu_to_be64(vstats.netstats.rx_errors);
+ cntrs->tx_packets = cpu_to_be64(vstats.netstats.tx_packets);
+ cntrs->rx_packets = cpu_to_be64(vstats.netstats.rx_packets);
+ cntrs->tx_bytes = cpu_to_be64(vstats.netstats.tx_bytes);
+ cntrs->rx_bytes = cpu_to_be64(vstats.netstats.rx_bytes);
+
+ /*
+ * This loop depends on layout of
+ * opa_veswport_summary_counters opa_vnic_stats structures.
+ */
+ for (dst = &cntrs->tx_unicast, src = &vstats.tx_grp.unicast;
+ dst < &cntrs->reserved[0]; dst++, src++) {
+ *dst = cpu_to_be64(*src);
+ }
+}
+
+/**
+ * opa_vnic_get_error_counters - get error counters
+ * @adapter: vnic port adapter
+ * @cntrs: pointer to destination error counters structure
+ *
+ * This function populates the error counters that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_error_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_error_counters *cntrs)
+{
+ struct opa_vnic_stats vstats;
+
+ memset(&vstats, 0, sizeof(vstats));
+ mutex_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats);
+ mutex_unlock(&adapter->stats_lock);
+
+ cntrs->vp_instance = cpu_to_be16(adapter->vport_num);
+ cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id);
+ cntrs->veswport_num = cpu_to_be32(adapter->port_num);
+
+ cntrs->tx_errors = cpu_to_be64(vstats.netstats.tx_errors);
+ cntrs->rx_errors = cpu_to_be64(vstats.netstats.rx_errors);
+ cntrs->tx_dlid_zero = cpu_to_be64(vstats.tx_dlid_zero);
+ cntrs->tx_drop_state = cpu_to_be64(vstats.tx_drop_state);
+ cntrs->tx_logic = cpu_to_be64(vstats.netstats.tx_fifo_errors +
+ vstats.netstats.tx_carrier_errors);
+
+ cntrs->rx_bad_veswid = cpu_to_be64(vstats.netstats.rx_nohandler);
+ cntrs->rx_runt = cpu_to_be64(vstats.rx_runt);
+ cntrs->rx_oversize = cpu_to_be64(vstats.rx_oversize);
+ cntrs->rx_drop_state = cpu_to_be64(vstats.rx_drop_state);
+ cntrs->rx_logic = cpu_to_be64(vstats.netstats.rx_fifo_errors);
+}
+
+/**
+ * opa_vnic_get_vesw_info -- Get the vesw information
+ * @adapter: vnic port adapter
+ * @info: pointer to destination vesw info structure
+ *
+ * This function copies the vesw info that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info)
+{
+ struct __opa_vesw_info *src = &adapter->info.vesw;
+ int i;
+
+ info->fabric_id = cpu_to_be16(src->fabric_id);
+ info->vesw_id = cpu_to_be16(src->vesw_id);
+ memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0));
+ info->def_port_mask = cpu_to_be16(src->def_port_mask);
+ memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1));
+ info->pkey = cpu_to_be16(src->pkey);
+
+ memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2));
+ info->u_mcast_dlid = cpu_to_be32(src->u_mcast_dlid);
+ for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++)
+ info->u_ucast_dlid[i] = cpu_to_be32(src->u_ucast_dlid[i]);
+
+ memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3));
+ for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++)
+ info->eth_mtu[i] = cpu_to_be16(src->eth_mtu[i]);
+
+ info->eth_mtu_non_vlan = cpu_to_be16(src->eth_mtu_non_vlan);
+ memcpy(info->rsvd4, src->rsvd4, ARRAY_SIZE(src->rsvd4));
+}
+
+/**
+ * opa_vnic_set_vesw_info -- Set the vesw information
+ * @adapter: vnic port adapter
+ * @info: pointer to vesw info structure
+ *
+ * This function updates the vesw info that is maintained by the
+ * given adapter with vesw info provided. Reserved fields are stored
+ * and returned back to EM as is.
+ */
+void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info)
+{
+ struct __opa_vesw_info *dst = &adapter->info.vesw;
+ int i;
+
+ dst->fabric_id = be16_to_cpu(info->fabric_id);
+ dst->vesw_id = be16_to_cpu(info->vesw_id);
+ memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0));
+ dst->def_port_mask = be16_to_cpu(info->def_port_mask);
+ memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1));
+ dst->pkey = be16_to_cpu(info->pkey);
+
+ memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2));
+ dst->u_mcast_dlid = be32_to_cpu(info->u_mcast_dlid);
+ for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++)
+ dst->u_ucast_dlid[i] = be32_to_cpu(info->u_ucast_dlid[i]);
+
+ memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3));
+ for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++)
+ dst->eth_mtu[i] = be16_to_cpu(info->eth_mtu[i]);
+
+ dst->eth_mtu_non_vlan = be16_to_cpu(info->eth_mtu_non_vlan);
+ memcpy(dst->rsvd4, info->rsvd4, ARRAY_SIZE(info->rsvd4));
+}
+
+/**
+ * opa_vnic_get_per_veswport_info -- Get the vesw per port information
+ * @adapter: vnic port adapter
+ * @info: pointer to destination vport info structure
+ *
+ * This function copies the vesw per port info that is maintained by the
+ * given adapter to destination address provided.
+ * Note that the read only fields are not copied.
+ */
+void opa_vnic_get_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info)
+{
+ struct __opa_per_veswport_info *src = &adapter->info.vport;
+
+ info->port_num = cpu_to_be32(src->port_num);
+ info->eth_link_status = src->eth_link_status;
+ memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0));
+
+ memcpy(info->base_mac_addr, src->base_mac_addr,
+ ARRAY_SIZE(info->base_mac_addr));
+ info->config_state = src->config_state;
+ info->oper_state = src->oper_state;
+ info->max_mac_tbl_ent = cpu_to_be16(src->max_mac_tbl_ent);
+ info->max_smac_ent = cpu_to_be16(src->max_smac_ent);
+ info->mac_tbl_digest = cpu_to_be32(src->mac_tbl_digest);
+ memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1));
+
+ info->encap_slid = cpu_to_be32(src->encap_slid);
+ memcpy(info->pcp_to_sc_uc, src->pcp_to_sc_uc,
+ ARRAY_SIZE(info->pcp_to_sc_uc));
+ memcpy(info->pcp_to_vl_uc, src->pcp_to_vl_uc,
+ ARRAY_SIZE(info->pcp_to_vl_uc));
+ memcpy(info->pcp_to_sc_mc, src->pcp_to_sc_mc,
+ ARRAY_SIZE(info->pcp_to_sc_mc));
+ memcpy(info->pcp_to_vl_mc, src->pcp_to_vl_mc,
+ ARRAY_SIZE(info->pcp_to_vl_mc));
+ info->non_vlan_sc_uc = src->non_vlan_sc_uc;
+ info->non_vlan_vl_uc = src->non_vlan_vl_uc;
+ info->non_vlan_sc_mc = src->non_vlan_sc_mc;
+ info->non_vlan_vl_mc = src->non_vlan_vl_mc;
+ memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2));
+
+ info->uc_macs_gen_count = cpu_to_be16(src->uc_macs_gen_count);
+ info->mc_macs_gen_count = cpu_to_be16(src->mc_macs_gen_count);
+ memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3));
+}
+
+/**
+ * opa_vnic_set_per_veswport_info -- Set vesw per port information
+ * @adapter: vnic port adapter
+ * @info: pointer to vport info structure
+ *
+ * This function updates the vesw per port info that is maintained by the
+ * given adapter with vesw per port info provided. Reserved fields are
+ * stored and returned back to EM as is.
+ */
+void opa_vnic_set_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info)
+{
+ struct __opa_per_veswport_info *dst = &adapter->info.vport;
+
+ dst->port_num = be32_to_cpu(info->port_num);
+ memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0));
+
+ memcpy(dst->base_mac_addr, info->base_mac_addr,
+ ARRAY_SIZE(dst->base_mac_addr));
+ dst->config_state = info->config_state;
+ memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1));
+
+ dst->encap_slid = be32_to_cpu(info->encap_slid);
+ memcpy(dst->pcp_to_sc_uc, info->pcp_to_sc_uc,
+ ARRAY_SIZE(dst->pcp_to_sc_uc));
+ memcpy(dst->pcp_to_vl_uc, info->pcp_to_vl_uc,
+ ARRAY_SIZE(dst->pcp_to_vl_uc));
+ memcpy(dst->pcp_to_sc_mc, info->pcp_to_sc_mc,
+ ARRAY_SIZE(dst->pcp_to_sc_mc));
+ memcpy(dst->pcp_to_vl_mc, info->pcp_to_vl_mc,
+ ARRAY_SIZE(dst->pcp_to_vl_mc));
+ dst->non_vlan_sc_uc = info->non_vlan_sc_uc;
+ dst->non_vlan_vl_uc = info->non_vlan_vl_uc;
+ dst->non_vlan_sc_mc = info->non_vlan_sc_mc;
+ dst->non_vlan_vl_mc = info->non_vlan_vl_mc;
+ memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2));
+ memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3));
+}
+
+/**
+ * opa_vnic_query_mcast_macs - query multicast mac list
+ * @adapter: vnic port adapter
+ * @macs: pointer mac list
+ *
+ * This function populates the provided mac list with the configured
+ * multicast addresses in the adapter.
+ */
+void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs)
+{
+ u16 start_idx, num_macs, idx = 0, count = 0;
+ struct netdev_hw_addr *ha;
+
+ start_idx = be16_to_cpu(macs->start_idx);
+ num_macs = be16_to_cpu(macs->num_macs_in_msg);
+ netdev_for_each_mc_addr(ha, adapter->netdev) {
+ struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+ if (start_idx > idx++)
+ continue;
+ else if (num_macs == count)
+ break;
+ memcpy(entry, ha->addr, sizeof(*entry));
+ count++;
+ }
+
+ macs->tot_macs_in_lst = cpu_to_be16(netdev_mc_count(adapter->netdev));
+ macs->num_macs_in_msg = cpu_to_be16(count);
+ macs->gen_count = cpu_to_be16(adapter->info.vport.mc_macs_gen_count);
+}
+
+/**
+ * opa_vnic_query_ucast_macs - query unicast mac list
+ * @adapter: vnic port adapter
+ * @macs: pointer mac list
+ *
+ * This function populates the provided mac list with the configured
+ * unicast addresses in the adapter.
+ */
+void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs)
+{
+ u16 start_idx, tot_macs, num_macs, idx = 0, count = 0;
+ struct netdev_hw_addr *ha;
+
+ start_idx = be16_to_cpu(macs->start_idx);
+ num_macs = be16_to_cpu(macs->num_macs_in_msg);
+ /* loop through dev_addrs list first */
+ for_each_dev_addr(adapter->netdev, ha) {
+ struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+ /* Do not include EM specified MAC address */
+ if (!memcmp(adapter->info.vport.base_mac_addr, ha->addr,
+ ARRAY_SIZE(adapter->info.vport.base_mac_addr)))
+ continue;
+
+ if (start_idx > idx++)
+ continue;
+ else if (num_macs == count)
+ break;
+ memcpy(entry, ha->addr, sizeof(*entry));
+ count++;
+ }
+
+ /* loop through uc list */
+ netdev_for_each_uc_addr(ha, adapter->netdev) {
+ struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+ if (start_idx > idx++)
+ continue;
+ else if (num_macs == count)
+ break;
+ memcpy(entry, ha->addr, sizeof(*entry));
+ count++;
+ }
+
+ tot_macs = netdev_hw_addr_list_count(&adapter->netdev->dev_addrs) +
+ netdev_uc_count(adapter->netdev);
+ macs->tot_macs_in_lst = cpu_to_be16(tot_macs);
+ macs->num_macs_in_msg = cpu_to_be16(count);
+ macs->gen_count = cpu_to_be16(adapter->info.vport.uc_macs_gen_count);
+}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index cee46266f434..def723a5df29 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -312,10 +312,15 @@ static int srp_new_cm_id(struct srp_rdma_ch *ch)
if (ch->cm_id)
ib_destroy_cm_id(ch->cm_id);
ch->cm_id = new_cm_id;
+ if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
+ target->srp_host->port))
+ ch->path.rec_type = SA_PATH_REC_TYPE_OPA;
+ else
+ ch->path.rec_type = SA_PATH_REC_TYPE_IB;
ch->path.sgid = target->sgid;
ch->path.dgid = target->orig_dgid;
ch->path.pkey = target->pkey;
- ch->path.service_id = target->service_id;
+ sa_path_set_service_id(&ch->path, target->service_id);
return 0;
}
@@ -643,7 +648,7 @@ static void srp_free_ch_ib(struct srp_target_port *target,
}
static void srp_path_rec_completion(int status,
- struct ib_sa_path_rec *pathrec,
+ struct sa_path_rec *pathrec,
void *ch_ptr)
{
struct srp_rdma_ch *ch = ch_ptr;
@@ -2399,12 +2404,12 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
switch (event->param.rej_rcvd.reason) {
case IB_CM_REJ_PORT_CM_REDIRECT:
cpi = event->param.rej_rcvd.ari;
- ch->path.dlid = cpi->redirect_lid;
+ sa_path_set_dlid(&ch->path, htonl(ntohs(cpi->redirect_lid)));
ch->path.pkey = cpi->redirect_pkey;
cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
- ch->status = ch->path.dlid ?
+ ch->status = sa_path_get_dlid(&ch->path) ?
SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
break;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 32ed40db3ca2..ab9077b81d5a 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -152,7 +152,7 @@ struct srp_rdma_ch {
struct completion done;
int status;
- struct ib_sa_path_rec path;
+ struct sa_path_rec path;
struct ib_sa_query *path_query;
int path_query_id;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 7e314c2f2071..7d6c199de2d6 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -417,7 +417,7 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_wc)
{
- ib_destroy_ah(mad_wc->send_buf->ah);
+ rdma_destroy_ah(mad_wc->send_buf->ah);
ib_free_send_mad(mad_wc->send_buf);
}
@@ -481,7 +481,7 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
ib_free_send_mad(rsp);
err_rsp:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
err:
ib_free_recv_mad(mad_wc);
}