From c183a4c33528d17cde0dcb093ae4248d8cb8f649 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:20 -0700 Subject: [PATCH] IB: Update FMR functions Change some functions to return void rather than an int since they are always returning 0, thus making checking return values rather pointless. Signed-off-by: Tom Duffy Signed-off-by: Libor Michalek Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/fmr_pool.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 328feae2a5be..7763b31abba7 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -29,7 +30,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: fmr_pool.c 1349 2004-12-16 21:09:43Z roland $ + * $Id: fmr_pool.c 2730 2005-06-28 16:43:03Z sean.hefty $ */ #include @@ -329,7 +330,7 @@ EXPORT_SYMBOL(ib_create_fmr_pool); * * Destroy an FMR pool and free all associated resources. */ -int ib_destroy_fmr_pool(struct ib_fmr_pool *pool) +void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) { struct ib_pool_fmr *fmr; struct ib_pool_fmr *tmp; @@ -352,8 +353,6 @@ int ib_destroy_fmr_pool(struct ib_fmr_pool *pool) kfree(pool->cache_bucket); kfree(pool); - - return 0; } EXPORT_SYMBOL(ib_destroy_fmr_pool); -- cgit v1.2.3-55-g7522 From b82cab6b331b51d82f90d2207f3bbfdf09361ac9 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:22 -0700 Subject: [PATCH] IB: Update MAD client API Automatically allocate a MR when registering a MAD agent. MAD clients are modified to use this updated API. Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/agent.c | 14 +------------- drivers/infiniband/core/agent_priv.h | 3 +-- drivers/infiniband/core/mad.c | 31 +++++++++++++++++++------------ drivers/infiniband/core/sa_query.c | 15 ++------------- drivers/infiniband/include/ib_mad.h | 1 + 5 files changed, 24 insertions(+), 40 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 23d1957c4b29..dde25ee81b65 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -134,7 +134,7 @@ static int agent_mad_send(struct ib_mad_agent *mad_agent, sizeof(mad_priv->mad), DMA_TO_DEVICE); gather_list.length = sizeof(mad_priv->mad); - gather_list.lkey = (*port_priv->mr).lkey; + gather_list.lkey = mad_agent->mr->lkey; send_wr.next = NULL; send_wr.opcode = IB_WR_SEND; @@ -322,22 +322,12 @@ int ib_agent_port_open(struct ib_device *device, int port_num) goto error3; } - port_priv->mr = ib_get_dma_mr(port_priv->smp_agent->qp->pd, - IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(port_priv->mr)) { - printk(KERN_ERR SPFX "Couldn't get DMA MR\n"); - ret = PTR_ERR(port_priv->mr); - goto error4; - } - spin_lock_irqsave(&ib_agent_port_list_lock, flags); list_add_tail(&port_priv->port_list, &ib_agent_port_list); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); return 0; -error4: - ib_unregister_mad_agent(port_priv->perf_mgmt_agent); error3: ib_unregister_mad_agent(port_priv->smp_agent); error2: @@ -361,8 +351,6 @@ int ib_agent_port_close(struct ib_device *device, int port_num) list_del(&port_priv->port_list); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); - ib_dereg_mr(port_priv->mr); - ib_unregister_mad_agent(port_priv->perf_mgmt_agent); ib_unregister_mad_agent(port_priv->smp_agent); kfree(port_priv); diff --git a/drivers/infiniband/core/agent_priv.h b/drivers/infiniband/core/agent_priv.h index 17a0cce5813c..17435af1e914 100644 --- a/drivers/infiniband/core/agent_priv.h +++ b/drivers/infiniband/core/agent_priv.h @@ -33,7 +33,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: agent_priv.h 1389 2004-12-27 22:56:47Z roland $ + * $Id: agent_priv.h 1640 2005-01-24 22:39:02Z halr $ */ #ifndef __IB_AGENT_PRIV_H__ @@ -57,7 +57,6 @@ struct ib_agent_port_private { int port_num; struct ib_mad_agent *smp_agent; /* SM class */ struct ib_mad_agent *perf_mgmt_agent; /* PerfMgmt class */ - struct ib_mr *mr; }; #endif /* __IB_AGENT_PRIV_H__ */ diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 23628c622a50..52748b0f7685 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -261,19 +261,26 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, ret = ERR_PTR(-ENOMEM); goto error1; } + memset(mad_agent_priv, 0, sizeof *mad_agent_priv); + + mad_agent_priv->agent.mr = ib_get_dma_mr(port_priv->qp_info[qpn].qp->pd, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(mad_agent_priv->agent.mr)) { + ret = ERR_PTR(-ENOMEM); + goto error2; + } if (mad_reg_req) { reg_req = kmalloc(sizeof *reg_req, GFP_KERNEL); if (!reg_req) { ret = ERR_PTR(-ENOMEM); - goto error2; + goto error3; } /* Make a copy of the MAD registration request */ memcpy(reg_req, mad_reg_req, sizeof *reg_req); } /* Now, fill in the various structures */ - memset(mad_agent_priv, 0, sizeof *mad_agent_priv); mad_agent_priv->qp_info = &port_priv->qp_info[qpn]; mad_agent_priv->reg_req = reg_req; mad_agent_priv->rmpp_version = rmpp_version; @@ -301,7 +308,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (method) { if (method_in_use(&method, mad_reg_req)) - goto error3; + goto error4; } } ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv, @@ -317,14 +324,14 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (is_vendor_method_in_use( vendor_class, mad_reg_req)) - goto error3; + goto error4; } } ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv); } if (ret2) { ret = ERR_PTR(ret2); - goto error3; + goto error4; } } @@ -346,11 +353,13 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, return &mad_agent_priv->agent; -error3: +error4: spin_unlock_irqrestore(&port_priv->reg_lock, flags); kfree(reg_req); -error2: +error3: kfree(mad_agent_priv); +error2: + ib_dereg_mr(mad_agent_priv->agent.mr); error1: return ret; } @@ -487,18 +496,15 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) * MADs, preventing us from queuing additional work */ cancel_mads(mad_agent_priv); - port_priv = mad_agent_priv->qp_info->port_priv; - cancel_delayed_work(&mad_agent_priv->timed_work); - flush_workqueue(port_priv->wq); spin_lock_irqsave(&port_priv->reg_lock, flags); remove_mad_reg_req(mad_agent_priv); list_del(&mad_agent_priv->agent_list); spin_unlock_irqrestore(&port_priv->reg_lock, flags); - /* XXX: Cleanup pending RMPP receives for this agent */ + flush_workqueue(port_priv->wq); atomic_dec(&mad_agent_priv->refcount); wait_event(mad_agent_priv->wait, @@ -506,6 +512,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) if (mad_agent_priv->reg_req) kfree(mad_agent_priv->reg_req); + ib_dereg_mr(mad_agent_priv->agent.mr); kfree(mad_agent_priv); } @@ -750,7 +757,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, list_add_tail(&local->completion_list, &mad_agent_priv->local_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); queue_work(mad_agent_priv->qp_info->port_priv->wq, - &mad_agent_priv->local_work); + &mad_agent_priv->local_work); ret = 1; out: return ret; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 5a08e81fa827..649824e33253 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -77,7 +77,6 @@ struct ib_sa_sm_ah { struct ib_sa_port { struct ib_mad_agent *agent; - struct ib_mr *mr; struct ib_sa_sm_ah *sm_ah; struct work_struct update_task; spinlock_t ah_lock; @@ -492,7 +491,7 @@ retry: sizeof (struct ib_sa_mad), DMA_TO_DEVICE); gather_list.length = sizeof (struct ib_sa_mad); - gather_list.lkey = port->mr->lkey; + gather_list.lkey = port->agent->mr->lkey; pci_unmap_addr_set(query, mapping, gather_list.addr); ret = ib_post_send_mad(port->agent, &wr, &bad_wr); @@ -780,7 +779,6 @@ static void ib_sa_add_one(struct ib_device *device) sa_dev->end_port = e; for (i = 0; i <= e - s; ++i) { - sa_dev->port[i].mr = NULL; sa_dev->port[i].sm_ah = NULL; sa_dev->port[i].port_num = i + s; spin_lock_init(&sa_dev->port[i].ah_lock); @@ -792,13 +790,6 @@ static void ib_sa_add_one(struct ib_device *device) if (IS_ERR(sa_dev->port[i].agent)) goto err; - sa_dev->port[i].mr = ib_get_dma_mr(sa_dev->port[i].agent->qp->pd, - IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(sa_dev->port[i].mr)) { - ib_unregister_mad_agent(sa_dev->port[i].agent); - goto err; - } - INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah, &sa_dev->port[i]); } @@ -822,10 +813,8 @@ static void ib_sa_add_one(struct ib_device *device) return; err: - while (--i >= 0) { - ib_dereg_mr(sa_dev->port[i].mr); + while (--i >= 0) ib_unregister_mad_agent(sa_dev->port[i].agent); - } kfree(sa_dev); diff --git a/drivers/infiniband/include/ib_mad.h b/drivers/infiniband/include/ib_mad.h index 4a6bf6763a97..60378c1a9ccf 100644 --- a/drivers/infiniband/include/ib_mad.h +++ b/drivers/infiniband/include/ib_mad.h @@ -180,6 +180,7 @@ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, struct ib_mad_agent { struct ib_device *device; struct ib_qp *qp; + struct ib_mr *mr; ib_mad_recv_handler recv_handler; ib_mad_send_handler send_handler; ib_mad_snoop_handler snoop_handler; -- cgit v1.2.3-55-g7522 From 824c8ae7d05bb4d21af707832c5bfa45d5494ec8 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:23 -0700 Subject: [PATCH] IB: Add MAD helper functions Add new helper routines for allocating MADs for sending and formatting a send WR. Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/mad.c | 76 +++++++++++++++++++++++++++++++++++++ drivers/infiniband/include/ib_mad.h | 60 +++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 52748b0f7685..d66ecf8243ec 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -763,6 +763,82 @@ out: return ret; } +static int get_buf_length(int hdr_len, int data_len) +{ + int seg_size, pad; + + seg_size = sizeof(struct ib_mad) - hdr_len; + if (data_len && seg_size) { + pad = seg_size - data_len % seg_size; + if (pad == seg_size) + pad = 0; + } else + pad = seg_size; + return hdr_len + data_len + pad; +} + +struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, + u32 remote_qpn, u16 pkey_index, + struct ib_ah *ah, + int hdr_len, int data_len, + unsigned int __nocast gfp_mask) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_buf *send_buf; + int buf_size; + void *buf; + + mad_agent_priv = container_of(mad_agent, + struct ib_mad_agent_private, agent); + buf_size = get_buf_length(hdr_len, data_len); + + buf = kmalloc(sizeof *send_buf + buf_size, gfp_mask); + if (!buf) + return ERR_PTR(-ENOMEM); + + send_buf = buf + buf_size; + memset(send_buf, 0, sizeof *send_buf); + send_buf->mad = buf; + + send_buf->sge.addr = dma_map_single(mad_agent->device->dma_device, + buf, buf_size, DMA_TO_DEVICE); + pci_unmap_addr_set(send_buf, mapping, send_buf->sge.addr); + send_buf->sge.length = buf_size; + send_buf->sge.lkey = mad_agent->mr->lkey; + + send_buf->send_wr.wr_id = (unsigned long) send_buf; + send_buf->send_wr.sg_list = &send_buf->sge; + send_buf->send_wr.num_sge = 1; + send_buf->send_wr.opcode = IB_WR_SEND; + send_buf->send_wr.send_flags = IB_SEND_SIGNALED; + send_buf->send_wr.wr.ud.ah = ah; + send_buf->send_wr.wr.ud.mad_hdr = &send_buf->mad->mad_hdr; + send_buf->send_wr.wr.ud.remote_qpn = remote_qpn; + send_buf->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; + send_buf->send_wr.wr.ud.pkey_index = pkey_index; + send_buf->mad_agent = mad_agent; + atomic_inc(&mad_agent_priv->refcount); + return send_buf; +} +EXPORT_SYMBOL(ib_create_send_mad); + +void ib_free_send_mad(struct ib_mad_send_buf *send_buf) +{ + struct ib_mad_agent_private *mad_agent_priv; + + mad_agent_priv = container_of(send_buf->mad_agent, + struct ib_mad_agent_private, agent); + + dma_unmap_single(send_buf->mad_agent->device->dma_device, + pci_unmap_addr(send_buf, mapping), + send_buf->sge.length, DMA_TO_DEVICE); + kfree(send_buf->mad); + + if (atomic_dec_and_test(&mad_agent_priv->refcount)) + wake_up(&mad_agent_priv->wait); +} +EXPORT_SYMBOL(ib_free_send_mad); + static int ib_send_mad(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_send_wr_private *mad_send_wr) { diff --git a/drivers/infiniband/include/ib_mad.h b/drivers/infiniband/include/ib_mad.h index 60378c1a9ccf..a6f06b8c4acf 100644 --- a/drivers/infiniband/include/ib_mad.h +++ b/drivers/infiniband/include/ib_mad.h @@ -39,6 +39,8 @@ #if !defined( IB_MAD_H ) #define IB_MAD_H +#include + #include /* Management base version */ @@ -73,6 +75,7 @@ #define IB_QP0 0 #define IB_QP1 __constant_htonl(1) #define IB_QP1_QKEY 0x80010000 +#define IB_QP_SET_QKEY 0x80000000 struct ib_grh { u32 version_tclass_flow; @@ -124,6 +127,30 @@ struct ib_vendor_mad { u8 data[216]; } __attribute__ ((packed)); +/** + * ib_mad_send_buf - MAD data buffer and work request for sends. + * @mad: References an allocated MAD data buffer. The size of the data + * buffer is specified in the @send_wr.length field. + * @mapping: DMA mapping information. + * @mad_agent: MAD agent that allocated the buffer. + * @context: User-controlled context fields. + * @send_wr: An initialized work request structure used when sending the MAD. + * The wr_id field of the work request is initialized to reference this + * data structure. + * @sge: A scatter-gather list referenced by the work request. + * + * Users are responsible for initializing the MAD buffer itself, with the + * exception of specifying the payload length field in any RMPP MAD. + */ +struct ib_mad_send_buf { + struct ib_mad *mad; + DECLARE_PCI_UNMAP_ADDR(mapping) + struct ib_mad_agent *mad_agent; + void *context[2]; + struct ib_send_wr send_wr; + struct ib_sge sge; +}; + struct ib_mad_agent; struct ib_mad_send_wc; struct ib_mad_recv_wc; @@ -402,4 +429,37 @@ struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, int ib_process_mad_wc(struct ib_mad_agent *mad_agent, struct ib_wc *wc); +/** + * ib_create_send_mad - Allocate and initialize a data buffer and work request + * for sending a MAD. + * @mad_agent: Specifies the registered MAD service to associate with the MAD. + * @remote_qpn: Specifies the QPN of the receiving node. + * @pkey_index: Specifies which PKey the MAD will be sent using. This field + * is valid only if the remote_qpn is QP 1. + * @ah: References the address handle used to transfer to the remote node. + * @hdr_len: Indicates the size of the data header of the MAD. This length + * should include the common MAD header, RMPP header, plus any class + * specific header. + * @data_len: Indicates the size of any user-transfered data. The call will + * automatically adjust the allocated buffer size to account for any + * additional padding that may be necessary. + * @gfp_mask: GFP mask used for the memory allocation. + * + * This is a helper routine that may be used to allocate a MAD. Users are + * not required to allocate outbound MADs using this call. The returned + * MAD send buffer will reference a data buffer usable for sending a MAD, along + * with an intialized work request structure. + */ +struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, + u32 remote_qpn, u16 pkey_index, + struct ib_ah *ah, + int hdr_len, int data_len, + unsigned int __nocast gfp_mask); + +/** + * ib_free_send_mad - Returns data buffers used to send a MAD. + * @send_buf: Previously allocated send data buffer. + */ +void ib_free_send_mad(struct ib_mad_send_buf *send_buf); + #endif /* IB_MAD_H */ -- cgit v1.2.3-55-g7522 From 4a0754fae8fb5162d1cf4f738d48bb1e8190c09f Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:24 -0700 Subject: [PATCH] IB: Combine some MAD routines Combine response_mad() and solicited_mad() routines into a single function and simplify/encapsulate its usage. Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/mad.c | 105 +++++++++++------------------------------- 1 file changed, 27 insertions(+), 78 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index d66ecf8243ec..ebe8c3a45410 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -58,7 +58,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method, static void remove_mad_reg_req(struct ib_mad_agent_private *priv); static struct ib_mad_agent_private *find_mad_agent( struct ib_mad_port_private *port_priv, - struct ib_mad *mad, int solicited); + struct ib_mad *mad); static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad); static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); @@ -67,7 +67,6 @@ static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, static void timeout_sends(void *data); static void cancel_sends(void *data); static void local_completions(void *data); -static int solicited_mad(struct ib_mad *mad); static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv, u8 mgmt_class); @@ -558,6 +557,13 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) } EXPORT_SYMBOL(ib_unregister_mad_agent); +static inline int response_mad(struct ib_mad *mad) +{ + /* Trap represses are responses although response bit is reset */ + return ((mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) || + (mad->mad_hdr.method & IB_MGMT_METHOD_RESP)); +} + static void dequeue_mad(struct ib_mad_list_head *mad_list) { struct ib_mad_queue *mad_queue; @@ -650,7 +656,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_smp *smp, struct ib_send_wr *send_wr) { - int ret, solicited; + int ret; unsigned long flags; struct ib_mad_local_private *local; struct ib_mad_private *mad_priv; @@ -696,11 +702,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: - /* - * See if response is solicited and - * there is a recv handler - */ - if (solicited_mad(&mad_priv->mad.mad) && + if (response_mad(&mad_priv->mad.mad) && mad_agent_priv->agent.recv_handler) { local->mad_priv = mad_priv; local->recv_mad_agent = mad_agent_priv; @@ -717,15 +719,13 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, break; case IB_MAD_RESULT_SUCCESS: /* Treat like an incoming receive MAD */ - solicited = solicited_mad(&mad_priv->mad.mad); port_priv = ib_get_mad_port(mad_agent_priv->agent.device, mad_agent_priv->agent.port_num); if (port_priv) { mad_priv->mad.mad.mad_hdr.tid = ((struct ib_mad *)smp)->mad_hdr.tid; recv_mad_agent = find_mad_agent(port_priv, - &mad_priv->mad.mad, - solicited); + &mad_priv->mad.mad); } if (!port_priv || !recv_mad_agent) { kmem_cache_free(ib_mad_cache, mad_priv); @@ -1421,42 +1421,15 @@ out: return; } -static int response_mad(struct ib_mad *mad) -{ - /* Trap represses are responses although response bit is reset */ - return ((mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) || - (mad->mad_hdr.method & IB_MGMT_METHOD_RESP)); -} - -static int solicited_mad(struct ib_mad *mad) -{ - /* CM MADs are never solicited */ - if (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CM) { - return 0; - } - - /* XXX: Determine whether MAD is using RMPP */ - - /* Not using RMPP */ - /* Is this MAD a response to a previous MAD ? */ - return response_mad(mad); -} - static struct ib_mad_agent_private * find_mad_agent(struct ib_mad_port_private *port_priv, - struct ib_mad *mad, - int solicited) + struct ib_mad *mad) { struct ib_mad_agent_private *mad_agent = NULL; unsigned long flags; spin_lock_irqsave(&port_priv->reg_lock, flags); - - /* - * Whether MAD was solicited determines type of routing to - * MAD client. - */ - if (solicited) { + if (response_mad(mad)) { u32 hi_tid; struct ib_mad_agent_private *entry; @@ -1560,18 +1533,6 @@ out: return valid; } -/* - * Return start of fully reassembled MAD, or NULL, if MAD isn't assembled yet - */ -static struct ib_mad_private * -reassemble_recv(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_private *recv) -{ - /* Until we have RMPP, all receives are reassembled!... */ - INIT_LIST_HEAD(&recv->header.recv_wc.recv_buf.list); - return recv; -} - static struct ib_mad_send_wr_private* find_send_req(struct ib_mad_agent_private *mad_agent_priv, u64 tid) @@ -1600,29 +1561,22 @@ find_send_req(struct ib_mad_agent_private *mad_agent_priv, } static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_private *recv, - int solicited) + struct ib_mad_recv_wc *mad_recv_wc) { struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags; + u64 tid; - /* Fully reassemble receive before processing */ - recv = reassemble_recv(mad_agent_priv, recv); - if (!recv) { - if (atomic_dec_and_test(&mad_agent_priv->refcount)) - wake_up(&mad_agent_priv->wait); - return; - } - + INIT_LIST_HEAD(&mad_recv_wc->recv_buf.list); /* Complete corresponding request */ - if (solicited) { + if (response_mad(mad_recv_wc->recv_buf.mad)) { + tid = mad_recv_wc->recv_buf.mad->mad_hdr.tid; spin_lock_irqsave(&mad_agent_priv->lock, flags); - mad_send_wr = find_send_req(mad_agent_priv, - recv->mad.mad.mad_hdr.tid); + mad_send_wr = find_send_req(mad_agent_priv, tid); if (!mad_send_wr) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - ib_free_recv_mad(&recv->header.recv_wc); + ib_free_recv_mad(mad_recv_wc); if (atomic_dec_and_test(&mad_agent_priv->refcount)) wake_up(&mad_agent_priv->wait); return; @@ -1632,10 +1586,9 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Defined behavior is to complete response before request */ - recv->header.recv_wc.wc->wr_id = mad_send_wr->wr_id; - mad_agent_priv->agent.recv_handler( - &mad_agent_priv->agent, - &recv->header.recv_wc); + mad_recv_wc->wc->wr_id = mad_send_wr->wr_id; + mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, + mad_recv_wc); atomic_dec(&mad_agent_priv->refcount); mad_send_wc.status = IB_WC_SUCCESS; @@ -1643,9 +1596,8 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, mad_send_wc.wr_id = mad_send_wr->wr_id; ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); } else { - mad_agent_priv->agent.recv_handler( - &mad_agent_priv->agent, - &recv->header.recv_wc); + mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, + mad_recv_wc); if (atomic_dec_and_test(&mad_agent_priv->refcount)) wake_up(&mad_agent_priv->wait); } @@ -1659,7 +1611,6 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_mad_private *recv, *response; struct ib_mad_list_head *mad_list; struct ib_mad_agent_private *mad_agent; - int solicited; response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); if (!response) @@ -1745,11 +1696,9 @@ local: } } - /* Determine corresponding MAD agent for incoming receive MAD */ - solicited = solicited_mad(&recv->mad.mad); - mad_agent = find_mad_agent(port_priv, &recv->mad.mad, solicited); + mad_agent = find_mad_agent(port_priv, &recv->mad.mad); if (mad_agent) { - ib_mad_complete_recv(mad_agent, recv, solicited); + ib_mad_complete_recv(mad_agent, &recv->header.recv_wc); /* * recv is freed up in error cases in ib_mad_complete_recv * or via recv_handler in ib_mad_complete_recv() -- cgit v1.2.3-55-g7522 From f8197a4ed1bba8c80ed6ddf4535ded80cb4152cf Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:24 -0700 Subject: [PATCH] IB: Change saving of user's send wr_id in MAD Move saving of user's send wr_id to better match layering of received response handling. Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/mad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index ebe8c3a45410..5535a45a8548 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -847,9 +847,8 @@ static int ib_send_mad(struct ib_mad_agent_private *mad_agent_priv, unsigned long flags; int ret; - /* Replace user's WR ID with our own to find WR upon completion */ + /* Set WR ID to find mad_send_wr upon completion */ qp_info = mad_agent_priv->qp_info; - mad_send_wr->wr_id = mad_send_wr->send_wr.wr_id; mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; @@ -948,6 +947,7 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent, mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; memcpy(mad_send_wr->sg_list, send_wr->sg_list, sizeof *send_wr->sg_list * send_wr->num_sge); + mad_send_wr->wr_id = mad_send_wr->send_wr.wr_id; mad_send_wr->send_wr.next = NULL; mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid; mad_send_wr->agent = mad_agent; -- cgit v1.2.3-55-g7522 From d760ce8f71ec5336c4a750a1293f26c0eb938c8a Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:25 -0700 Subject: [PATCH] IB: Change ib_mad_send_wr_private struct Have ib_mad_send_wr_private reference the private agent structure directly, rather than the exposed agent definition. Remove unneeded parameters to functions and simplify code were possible from this change. Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/mad.c | 22 ++++++++++------------ drivers/infiniband/core/mad_priv.h | 4 ++-- 2 files changed, 12 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 5535a45a8548..d1898b30c345 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -839,8 +839,7 @@ void ib_free_send_mad(struct ib_mad_send_buf *send_buf) } EXPORT_SYMBOL(ib_free_send_mad); -static int ib_send_mad(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_send_wr_private *mad_send_wr) +static int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_qp_info *qp_info; struct ib_send_wr *bad_send_wr; @@ -848,7 +847,7 @@ static int ib_send_mad(struct ib_mad_agent_private *mad_agent_priv, int ret; /* Set WR ID to find mad_send_wr upon completion */ - qp_info = mad_agent_priv->qp_info; + qp_info = mad_send_wr->mad_agent_priv->qp_info; mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; @@ -857,7 +856,7 @@ static int ib_send_mad(struct ib_mad_agent_private *mad_agent_priv, list_add_tail(&mad_send_wr->mad_list.list, &qp_info->send_queue.list); spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); - ret = ib_post_send(mad_agent_priv->agent.qp, + ret = ib_post_send(mad_send_wr->mad_agent_priv->agent.qp, &mad_send_wr->send_wr, &bad_send_wr); if (ret) { printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret); @@ -950,7 +949,7 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent, mad_send_wr->wr_id = mad_send_wr->send_wr.wr_id; mad_send_wr->send_wr.next = NULL; mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid; - mad_send_wr->agent = mad_agent; + mad_send_wr->mad_agent_priv = mad_agent_priv; /* Timeout will be updated after send completes */ mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr. ud.timeout_ms); @@ -966,7 +965,7 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent, &mad_agent_priv->send_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - ret = ib_send_mad(mad_agent_priv, mad_send_wr); + ret = ib_send_mad(mad_send_wr); if (ret) { /* Fail send request */ spin_lock_irqsave(&mad_agent_priv->lock, flags); @@ -1742,13 +1741,14 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) } } -static void wait_for_response(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_send_wr_private *mad_send_wr ) +static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) { + struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *temp_mad_send_wr; struct list_head *list_item; unsigned long delay; + mad_agent_priv = mad_send_wr->mad_agent_priv; list_del(&mad_send_wr->agent_list); delay = mad_send_wr->timeout; @@ -1781,9 +1781,7 @@ static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_agent_private *mad_agent_priv; unsigned long flags; - mad_agent_priv = container_of(mad_send_wr->agent, - struct ib_mad_agent_private, agent); - + mad_agent_priv = mad_send_wr->mad_agent_priv; spin_lock_irqsave(&mad_agent_priv->lock, flags); if (mad_send_wc->status != IB_WC_SUCCESS && mad_send_wr->status == IB_WC_SUCCESS) { @@ -1794,7 +1792,7 @@ static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, if (--mad_send_wr->refcount > 0) { if (mad_send_wr->refcount == 1 && mad_send_wr->timeout && mad_send_wr->status == IB_WC_SUCCESS) { - wait_for_response(mad_agent_priv, mad_send_wr); + wait_for_response(mad_send_wr); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); return; diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 008cbcb94b15..96f1b5b610c2 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -29,7 +29,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: mad_priv.h 1389 2004-12-27 22:56:47Z roland $ + * $Id: mad_priv.h 1980 2005-03-11 22:33:53Z sean.hefty $ */ #ifndef __IB_MAD_PRIV_H__ @@ -116,7 +116,7 @@ struct ib_mad_snoop_private { struct ib_mad_send_wr_private { struct ib_mad_list_head mad_list; struct list_head agent_list; - struct ib_mad_agent *agent; + struct ib_mad_agent_private *mad_agent_priv; struct ib_send_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; u64 wr_id; /* client WR ID */ -- cgit v1.2.3-55-g7522 From 6a0c435ef9e2473934442282054d0f58235d1de2 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:26 -0700 Subject: [PATCH] IB: Fix timeout/cancelled MAD handling Fixes an issue processing a sent MAD after it has timed out or been canceled. The race occurs when a response MAD matches with the send request. The request could time out or be canceled after the response MAD matches with the request, but before the request completion can be processed. Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/mad.c | 14 ++++++++++++-- drivers/infiniband/core/mad_priv.h | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index d1898b30c345..7af8f7f87849 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -341,6 +341,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, spin_lock_init(&mad_agent_priv->lock); INIT_LIST_HEAD(&mad_agent_priv->send_list); INIT_LIST_HEAD(&mad_agent_priv->wait_list); + INIT_LIST_HEAD(&mad_agent_priv->done_list); INIT_WORK(&mad_agent_priv->timed_work, timeout_sends, mad_agent_priv); INIT_LIST_HEAD(&mad_agent_priv->local_list); INIT_WORK(&mad_agent_priv->local_work, local_completions, @@ -1559,6 +1560,16 @@ find_send_req(struct ib_mad_agent_private *mad_agent_priv, return NULL; } +static void ib_mark_req_done(struct ib_mad_send_wr_private *mad_send_wr) +{ + mad_send_wr->timeout = 0; + if (mad_send_wr->refcount == 1) { + list_del(&mad_send_wr->agent_list); + list_add_tail(&mad_send_wr->agent_list, + &mad_send_wr->mad_agent_priv->done_list); + } +} + static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_recv_wc *mad_recv_wc) { @@ -1580,8 +1591,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, wake_up(&mad_agent_priv->wait); return; } - /* Timeout = 0 means that we won't wait for a response */ - mad_send_wr->timeout = 0; + ib_mark_req_done(mad_send_wr); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Defined behavior is to complete response before request */ diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 96f1b5b610c2..6fcab0009bb9 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -92,6 +92,7 @@ struct ib_mad_agent_private { spinlock_t lock; struct list_head send_list; struct list_head wait_list; + struct list_head done_list; struct work_struct timed_work; unsigned long timeout; struct list_head local_list; -- cgit v1.2.3-55-g7522 From f68bcc2df8115b4ea45bfa4f8de22ec7232562b5 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:27 -0700 Subject: [PATCH] IB: Minor cleanup during MAD startup and shutdown Minor cleanup during startup and shutdown Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/mad.c | 44 +++++++++---------------------------------- 1 file changed, 9 insertions(+), 35 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 7af8f7f87849..9719fa6c14f7 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2487,14 +2487,6 @@ static int ib_mad_port_open(struct ib_device *device, unsigned long flags; char name[sizeof "ib_mad123"]; - /* First, check if port already open at MAD layer */ - port_priv = ib_get_mad_port(device, port_num); - if (port_priv) { - printk(KERN_DEBUG PFX "%s port %d already open\n", - device->name, port_num); - return 0; - } - /* Create new device info */ port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL); if (!port_priv) { @@ -2619,7 +2611,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) static void ib_mad_init_device(struct ib_device *device) { - int ret, num_ports, cur_port, i, ret2; + int num_ports, cur_port, i; if (device->node_type == IB_NODE_SWITCH) { num_ports = 1; @@ -2629,47 +2621,37 @@ static void ib_mad_init_device(struct ib_device *device) cur_port = 1; } for (i = 0; i < num_ports; i++, cur_port++) { - ret = ib_mad_port_open(device, cur_port); - if (ret) { + if (ib_mad_port_open(device, cur_port)) { printk(KERN_ERR PFX "Couldn't open %s port %d\n", device->name, cur_port); goto error_device_open; } - ret = ib_agent_port_open(device, cur_port); - if (ret) { + if (ib_agent_port_open(device, cur_port)) { printk(KERN_ERR PFX "Couldn't open %s port %d " "for agents\n", device->name, cur_port); goto error_device_open; } } - - goto error_device_query; + return; error_device_open: while (i > 0) { cur_port--; - ret2 = ib_agent_port_close(device, cur_port); - if (ret2) { + if (ib_agent_port_close(device, cur_port)) printk(KERN_ERR PFX "Couldn't close %s port %d " "for agents\n", device->name, cur_port); - } - ret2 = ib_mad_port_close(device, cur_port); - if (ret2) { + if (ib_mad_port_close(device, cur_port)) printk(KERN_ERR PFX "Couldn't close %s port %d\n", device->name, cur_port); - } i--; } - -error_device_query: - return; } static void ib_mad_remove_device(struct ib_device *device) { - int ret = 0, i, num_ports, cur_port, ret2; + int i, num_ports, cur_port; if (device->node_type == IB_NODE_SWITCH) { num_ports = 1; @@ -2679,21 +2661,13 @@ static void ib_mad_remove_device(struct ib_device *device) cur_port = 1; } for (i = 0; i < num_ports; i++, cur_port++) { - ret2 = ib_agent_port_close(device, cur_port); - if (ret2) { + if (ib_agent_port_close(device, cur_port)) printk(KERN_ERR PFX "Couldn't close %s port %d " "for agents\n", device->name, cur_port); - if (!ret) - ret = ret2; - } - ret2 = ib_mad_port_close(device, cur_port); - if (ret2) { + if (ib_mad_port_close(device, cur_port)) printk(KERN_ERR PFX "Couldn't close %s port %d\n", device->name, cur_port); - if (!ret) - ret = ret2; - } } } -- cgit v1.2.3-55-g7522 From df9f9ead746e9607099d7024f312133944173609 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:28 -0700 Subject: [PATCH] IB: Add ib_coalesce_recv_mad to MAD Add implementation for ib_coalesce_recv_mad. Also, clear allocated MAD data buffer in ib_create_send_mad. Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/mad.c | 9 +-------- drivers/infiniband/include/ib_mad.h | 3 +-- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 9719fa6c14f7..430a6ee89877 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -796,9 +796,9 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, buf = kmalloc(sizeof *send_buf + buf_size, gfp_mask); if (!buf) return ERR_PTR(-ENOMEM); + memset(buf, 0, sizeof *send_buf + buf_size); send_buf = buf + buf_size; - memset(send_buf, 0, sizeof *send_buf); send_buf->mad = buf; send_buf->sge.addr = dma_map_single(mad_agent->device->dma_device, @@ -1021,13 +1021,6 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) } EXPORT_SYMBOL(ib_free_recv_mad); -void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, - void *buf) -{ - printk(KERN_ERR PFX "ib_coalesce_recv_mad() not implemented yet\n"); -} -EXPORT_SYMBOL(ib_coalesce_recv_mad); - struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, u8 rmpp_version, ib_mad_send_handler send_handler, diff --git a/drivers/infiniband/include/ib_mad.h b/drivers/infiniband/include/ib_mad.h index a6f06b8c4acf..e8a122122cba 100644 --- a/drivers/infiniband/include/ib_mad.h +++ b/drivers/infiniband/include/ib_mad.h @@ -365,8 +365,7 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent, * This call copies a chain of received RMPP MADs into a single data buffer, * removing duplicated headers. */ -void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, - void *buf); +void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf); /** * ib_free_recv_mad - Returns data buffers used to receive a MAD to the -- cgit v1.2.3-55-g7522 From f75b7a5294949cd1b7bc301e3087c7bb78e22520 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:29 -0700 Subject: [PATCH] IB: Add automatic retries to MAD layer Add automatic retries to MAD layer. Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/mad.c | 26 +++++++++++++++++++++++++- drivers/infiniband/core/mad_priv.h | 2 ++ drivers/infiniband/core/sa_query.c | 3 ++- drivers/infiniband/core/user_mad.c | 1 + drivers/infiniband/include/ib_verbs.h | 1 + 5 files changed, 31 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 430a6ee89877..04f88d337388 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -954,7 +954,7 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent, /* Timeout will be updated after send completes */ mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr. ud.timeout_ms); - mad_send_wr->retry = 0; + mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries; /* One reference for each work request to QP + response */ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); mad_send_wr->status = IB_WC_SUCCESS; @@ -2174,6 +2174,27 @@ local_send_completion: spin_unlock_irqrestore(&mad_agent_priv->lock, flags); } +static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) +{ + int ret; + + if (!mad_send_wr->retries--) + return -ETIMEDOUT; + + mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_wr. + wr.ud.timeout_ms); + + ret = ib_send_mad(mad_send_wr); + + if (!ret) { + mad_send_wr->refcount++; + list_del(&mad_send_wr->agent_list); + list_add_tail(&mad_send_wr->agent_list, + &mad_send_wr->mad_agent_priv->send_list); + } + return ret; +} + static void timeout_sends(void *data) { struct ib_mad_agent_private *mad_agent_priv; @@ -2202,6 +2223,9 @@ static void timeout_sends(void *data) break; } + if (!retry_send(mad_send_wr)) + continue; + list_del(&mad_send_wr->agent_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 6fcab0009bb9..8a61dd921d29 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -123,6 +123,7 @@ struct ib_mad_send_wr_private { u64 wr_id; /* client WR ID */ u64 tid; unsigned long timeout; + int retries; int retry; int refcount; enum ib_wc_status status; @@ -136,6 +137,7 @@ struct ib_mad_local_private { struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; u64 wr_id; /* client WR ID */ u64 tid; + int retries; }; struct ib_mad_mgmt_method_table { diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 649824e33253..9ef8fe0163dd 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -462,7 +462,8 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms) .mad_hdr = &query->mad->mad_hdr, .remote_qpn = 1, .remote_qkey = IB_QP1_QKEY, - .timeout_ms = timeout_ms + .timeout_ms = timeout_ms, + .retries = 0 } } }; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 9d912d6877ff..088bb1f0f514 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -322,6 +322,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, wr.wr.ud.remote_qpn = be32_to_cpu(packet->mad.qpn); wr.wr.ud.remote_qkey = be32_to_cpu(packet->mad.qkey); wr.wr.ud.timeout_ms = packet->mad.timeout_ms; + wr.wr.ud.retries = 0; wr.wr_id = (unsigned long) packet; diff --git a/drivers/infiniband/include/ib_verbs.h b/drivers/infiniband/include/ib_verbs.h index e5bd9a10c201..b6107c4b683a 100644 --- a/drivers/infiniband/include/ib_verbs.h +++ b/drivers/infiniband/include/ib_verbs.h @@ -566,6 +566,7 @@ struct ib_send_wr { u32 remote_qpn; u32 remote_qkey; int timeout_ms; /* valid for MADs only */ + int retries; /* valid for MADs only */ u16 pkey_index; /* valid for GSI only */ u8 port_num; /* valid for DR SMPs on switch only */ } ud; -- cgit v1.2.3-55-g7522 From dbf9227bd3dff71c3c2f540cc3e96098d2ab41e7 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:30 -0700 Subject: [PATCH] IB: Simplify calling of list_del in MAD Simplify calling of list_del. Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/mad.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 04f88d337388..e96ca278c90e 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2188,7 +2188,6 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) if (!ret) { mad_send_wr->refcount++; - list_del(&mad_send_wr->agent_list); list_add_tail(&mad_send_wr->agent_list, &mad_send_wr->mad_agent_priv->send_list); } @@ -2223,10 +2222,10 @@ static void timeout_sends(void *data) break; } + list_del(&mad_send_wr->agent_list); if (!retry_send(mad_send_wr)) continue; - list_del(&mad_send_wr->agent_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); mad_send_wc.wr_id = mad_send_wr->wr_id; -- cgit v1.2.3-55-g7522 From 2c153b934dca08d58e0aafde18a182e0891aa201 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:31 -0700 Subject: [PATCH] IB: Eliminate MAD cache leak associated with local completions Eliminate MAD cache leak associated with local completions. Also, when canceling MAD, empty local completion list as well. Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/mad.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index e96ca278c90e..8948f6f300a4 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1994,6 +1994,8 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) /* Empty wait list to prevent receives from finding a request */ list_splice_init(&mad_agent_priv->wait_list, &cancel_list); + /* Empty local completion list as well */ + list_splice_init(&mad_agent_priv->local_list, &cancel_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Report all cancelled requests */ @@ -2108,6 +2110,7 @@ static void local_completions(void *data) struct ib_mad_local_private *local; struct ib_mad_agent_private *recv_mad_agent; unsigned long flags; + int recv = 0; struct ib_wc wc; struct ib_mad_send_wc mad_send_wc; @@ -2123,10 +2126,10 @@ static void local_completions(void *data) recv_mad_agent = local->recv_mad_agent; if (!recv_mad_agent) { printk(KERN_ERR PFX "No receive MAD agent for local completion\n"); - kmem_cache_free(ib_mad_cache, local->mad_priv); goto local_send_completion; } + recv = 1; /* * Defined behavior is to complete response * before request @@ -2169,6 +2172,8 @@ local_send_completion: spin_lock_irqsave(&mad_agent_priv->lock, flags); list_del(&local->completion_list); atomic_dec(&mad_agent_priv->refcount); + if (!recv) + kmem_cache_free(ib_mad_cache, local->mad_priv); kfree(local); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); -- cgit v1.2.3-55-g7522 From 03b61ad2f29295f019e095d0f490f30a4d678d3f Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:32 -0700 Subject: [PATCH] IB: Add ib_modify_mad API to MAD Add new MAD layer call to modify (ib_modify_mad) the timeout of a sent MAD, and simplify cancel code. Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/mad.c | 83 +++++++++++++------------------------ drivers/infiniband/core/mad_priv.h | 2 - drivers/infiniband/include/ib_mad.h | 14 ++++++- 3 files changed, 40 insertions(+), 59 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 8948f6f300a4..7af72d4ae6c8 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -65,7 +65,6 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc); static void timeout_sends(void *data); -static void cancel_sends(void *data); static void local_completions(void *data); static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv, @@ -346,8 +345,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, INIT_LIST_HEAD(&mad_agent_priv->local_list); INIT_WORK(&mad_agent_priv->local_work, local_completions, mad_agent_priv); - INIT_LIST_HEAD(&mad_agent_priv->canceled_list); - INIT_WORK(&mad_agent_priv->canceled_work, cancel_sends, mad_agent_priv); atomic_set(&mad_agent_priv->refcount, 1); init_waitqueue_head(&mad_agent_priv->wait); @@ -1775,6 +1772,13 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) } } +void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, + int timeout_ms) +{ + mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); + wait_for_response(mad_send_wr); +} + /* * Process a send work completion */ @@ -2034,41 +2038,7 @@ find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, return NULL; } -void cancel_sends(void *data) -{ - struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_wr_private *mad_send_wr; - struct ib_mad_send_wc mad_send_wc; - unsigned long flags; - - mad_agent_priv = data; - - mad_send_wc.status = IB_WC_WR_FLUSH_ERR; - mad_send_wc.vendor_err = 0; - - spin_lock_irqsave(&mad_agent_priv->lock, flags); - while (!list_empty(&mad_agent_priv->canceled_list)) { - mad_send_wr = list_entry(mad_agent_priv->canceled_list.next, - struct ib_mad_send_wr_private, - agent_list); - - list_del(&mad_send_wr->agent_list); - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - - mad_send_wc.wr_id = mad_send_wr->wr_id; - mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, - &mad_send_wc); - - kfree(mad_send_wr); - if (atomic_dec_and_test(&mad_agent_priv->refcount)) - wake_up(&mad_agent_priv->wait); - spin_lock_irqsave(&mad_agent_priv->lock, flags); - } - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); -} - -void ib_cancel_mad(struct ib_mad_agent *mad_agent, - u64 wr_id) +int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; @@ -2078,29 +2048,30 @@ void ib_cancel_mad(struct ib_mad_agent *mad_agent, agent); spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id); - if (!mad_send_wr) { + if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - goto out; + return -EINVAL; } - if (mad_send_wr->status == IB_WC_SUCCESS) - mad_send_wr->refcount -= (mad_send_wr->timeout > 0); - - if (mad_send_wr->refcount != 0) { + if (!timeout_ms) { mad_send_wr->status = IB_WC_WR_FLUSH_ERR; - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - goto out; + mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } - list_del(&mad_send_wr->agent_list); - list_add_tail(&mad_send_wr->agent_list, &mad_agent_priv->canceled_list); - adjust_timeout(mad_agent_priv); + mad_send_wr->send_wr.wr.ud.timeout_ms = timeout_ms; + if (!mad_send_wr->timeout || mad_send_wr->refcount > 1) + mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); + else + ib_reset_mad_timeout(mad_send_wr, timeout_ms); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + return 0; +} +EXPORT_SYMBOL(ib_modify_mad); - queue_work(mad_agent_priv->qp_info->port_priv->wq, - &mad_agent_priv->canceled_work); -out: - return; +void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id) +{ + ib_modify_mad(mad_agent, wr_id, 0); } EXPORT_SYMBOL(ib_cancel_mad); @@ -2207,8 +2178,6 @@ static void timeout_sends(void *data) unsigned long flags, delay; mad_agent_priv = (struct ib_mad_agent_private *)data; - - mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; mad_send_wc.vendor_err = 0; spin_lock_irqsave(&mad_agent_priv->lock, flags); @@ -2233,6 +2202,10 @@ static void timeout_sends(void *data) spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + if (mad_send_wr->status == IB_WC_SUCCESS) + mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; + else + mad_send_wc.status = mad_send_wr->status; mad_send_wc.wr_id = mad_send_wr->wr_id; mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 8a61dd921d29..e5e37b5be387 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -97,8 +97,6 @@ struct ib_mad_agent_private { unsigned long timeout; struct list_head local_list; struct work_struct local_work; - struct list_head canceled_list; - struct work_struct canceled_work; atomic_t refcount; wait_queue_head_t wait; diff --git a/drivers/infiniband/include/ib_mad.h b/drivers/infiniband/include/ib_mad.h index e8a122122cba..c5f3170c59ef 100644 --- a/drivers/infiniband/include/ib_mad.h +++ b/drivers/infiniband/include/ib_mad.h @@ -385,8 +385,18 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc); * MADs will be returned to the user through the corresponding * ib_mad_send_handler. */ -void ib_cancel_mad(struct ib_mad_agent *mad_agent, - u64 wr_id); +void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id); + +/** + * ib_modify_mad - Modifies an outstanding send MAD operation. + * @mad_agent: Specifies the registration associated with sent MAD. + * @wr_id: Indicates the work request identifier of the MAD to modify. + * @timeout_ms: New timeout value for sent MAD. + * + * This call will reset the timeout value for a sent MAD to the specified + * value. + */ +int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms); /** * ib_redirect_mad_qp - Registers a QP for MAD services. -- cgit v1.2.3-55-g7522 From 29bb33dd87dbe8db07c2b19df3fb453d999c96de Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:32 -0700 Subject: [PATCH] IB: Optimize canceling a MAD Optimize canceling a MAD. - Eliminate searching timeout list in cancel case. - Remove duplicate calls to queue work item. - Eliminate resending a MAD before MAD is completed. Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/mad.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 7af72d4ae6c8..1d8f26f54ec9 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1754,14 +1754,18 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) delay = mad_send_wr->timeout; mad_send_wr->timeout += jiffies; - list_for_each_prev(list_item, &mad_agent_priv->wait_list) { - temp_mad_send_wr = list_entry(list_item, - struct ib_mad_send_wr_private, - agent_list); - if (time_after(mad_send_wr->timeout, - temp_mad_send_wr->timeout)) - break; + if (delay) { + list_for_each_prev(list_item, &mad_agent_priv->wait_list) { + temp_mad_send_wr = list_entry(list_item, + struct ib_mad_send_wr_private, + agent_list); + if (time_after(mad_send_wr->timeout, + temp_mad_send_wr->timeout)) + break; + } } + else + list_item = &mad_agent_priv->wait_list; list_add(&mad_send_wr->agent_list, list_item); /* Reschedule a work item if we have a shorter timeout */ @@ -2197,7 +2201,8 @@ static void timeout_sends(void *data) } list_del(&mad_send_wr->agent_list); - if (!retry_send(mad_send_wr)) + if (mad_send_wr->status == IB_WC_SUCCESS && + !retry_send(mad_send_wr)) continue; spin_unlock_irqrestore(&mad_agent_priv->lock, flags); -- cgit v1.2.3-55-g7522 From cabe3cbcbb3b09637b9e706c49eadb180fca057e Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:33 -0700 Subject: [PATCH] IB: Fix a couple of MAD code paths Fixed locking to handle error posting MAD send work requests. Fixed handling canceling a MAD with an active work request. Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/mad.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 1d8f26f54ec9..8216af0ba783 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -841,6 +841,7 @@ static int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_qp_info *qp_info; struct ib_send_wr *bad_send_wr; + struct list_head *list; unsigned long flags; int ret; @@ -850,22 +851,20 @@ static int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; spin_lock_irqsave(&qp_info->send_queue.lock, flags); - if (qp_info->send_queue.count++ < qp_info->send_queue.max_active) { - list_add_tail(&mad_send_wr->mad_list.list, - &qp_info->send_queue.list); - spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); + if (qp_info->send_queue.count < qp_info->send_queue.max_active) { ret = ib_post_send(mad_send_wr->mad_agent_priv->agent.qp, &mad_send_wr->send_wr, &bad_send_wr); - if (ret) { - printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret); - dequeue_mad(&mad_send_wr->mad_list); - } + list = &qp_info->send_queue.list; } else { - list_add_tail(&mad_send_wr->mad_list.list, - &qp_info->overflow_list); - spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); ret = 0; + list = &qp_info->overflow_list; } + + if (!ret) { + qp_info->send_queue.count++; + list_add_tail(&mad_send_wr->mad_list.list, list); + } + spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); return ret; } @@ -2023,8 +2022,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) } static struct ib_mad_send_wr_private* -find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, - u64 wr_id) +find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, u64 wr_id) { struct ib_mad_send_wr_private *mad_send_wr; @@ -2047,6 +2045,7 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; unsigned long flags; + int active; mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); @@ -2057,13 +2056,14 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) return -EINVAL; } + active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1); if (!timeout_ms) { mad_send_wr->status = IB_WC_WR_FLUSH_ERR; mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } mad_send_wr->send_wr.wr.ud.timeout_ms = timeout_ms; - if (!mad_send_wr->timeout || mad_send_wr->refcount > 1) + if (active) mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); else ib_reset_mad_timeout(mad_send_wr, timeout_ms); -- cgit v1.2.3-55-g7522 From 513789ed995fb2ba72ba2a5bee53ea11d1170580 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:34 -0700 Subject: [PATCH] IB: Add ib_create_ah_from_wc to IB verbs Added new call: ib_create_ah_from_wc. Call will allocate an address handle given work completion information, including any received GRH. Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/verbs.c | 35 +++++++++++++++++++++++++++++++++++ drivers/infiniband/include/ib_mad.h | 9 --------- drivers/infiniband/include/ib_verbs.h | 24 ++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 2516f9646515..62951594eec6 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -41,6 +41,7 @@ #include #include +#include /* Protection domains */ @@ -88,6 +89,40 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } EXPORT_SYMBOL(ib_create_ah); +struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, + struct ib_grh *grh, u8 port_num) +{ + struct ib_ah_attr ah_attr; + u32 flow_class; + u16 gid_index; + int ret; + + memset(&ah_attr, 0, sizeof ah_attr); + ah_attr.dlid = wc->slid; + ah_attr.sl = wc->sl; + ah_attr.src_path_bits = wc->dlid_path_bits; + ah_attr.port_num = port_num; + + if (wc->wc_flags & IB_WC_GRH) { + ah_attr.ah_flags = IB_AH_GRH; + ah_attr.grh.dgid = grh->dgid; + + ret = ib_find_cached_gid(pd->device, &grh->sgid, &port_num, + &gid_index); + if (ret) + return ERR_PTR(ret); + + ah_attr.grh.sgid_index = (u8) gid_index; + flow_class = be32_to_cpu(&grh->version_tclass_flow); + ah_attr.grh.flow_label = flow_class & 0xFFFFF; + ah_attr.grh.traffic_class = (flow_class >> 20) & 0xFF; + ah_attr.grh.hop_limit = grh->hop_limit; + } + + return ib_create_ah(pd, &ah_attr); +} +EXPORT_SYMBOL(ib_create_ah_from_wc); + int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) { return ah->device->modify_ah ? diff --git a/drivers/infiniband/include/ib_mad.h b/drivers/infiniband/include/ib_mad.h index c5f3170c59ef..817e932c79c0 100644 --- a/drivers/infiniband/include/ib_mad.h +++ b/drivers/infiniband/include/ib_mad.h @@ -77,15 +77,6 @@ #define IB_QP1_QKEY 0x80010000 #define IB_QP_SET_QKEY 0x80000000 -struct ib_grh { - u32 version_tclass_flow; - u16 paylen; - u8 next_hdr; - u8 hop_limit; - union ib_gid sgid; - union ib_gid dgid; -} __attribute__ ((packed)); - struct ib_mad_hdr { u8 base_version; u8 mgmt_class; diff --git a/drivers/infiniband/include/ib_verbs.h b/drivers/infiniband/include/ib_verbs.h index b6107c4b683a..5d24edaa66e6 100644 --- a/drivers/infiniband/include/ib_verbs.h +++ b/drivers/infiniband/include/ib_verbs.h @@ -289,6 +289,15 @@ struct ib_global_route { u8 traffic_class; }; +struct ib_grh { + u32 version_tclass_flow; + u16 paylen; + u8 next_hdr; + u8 hop_limit; + union ib_gid sgid; + union ib_gid dgid; +}; + enum { IB_MULTICAST_QPN = 0xffffff }; @@ -990,6 +999,21 @@ int ib_dealloc_pd(struct ib_pd *pd); */ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +/** + * ib_create_ah_from_wc - Creates an address handle associated with the + * sender of the specified work completion. + * @pd: The protection domain associated with the address handle. + * @wc: Work completion information associated with a received message. + * @grh: References the received global route header. This parameter is + * ignored unless the work completion indicates that the GRH is valid. + * @port_num: The outbound port number to associate with the address. + * + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, + struct ib_grh *grh, u8 port_num); + /** * ib_modify_ah - Modifies the address vector associated with an address * handle. -- cgit v1.2.3-55-g7522 From 497677ab940e637a41351dca6610bc4320abc8f1 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:35 -0700 Subject: [PATCH] IB: A couple of IB core bug fixes Replace be32_to_cpup with be32_to_cpu and fix bug referencing pointer rather than value in ib_create_ah_from_wc(). Signed-off-by: Tom Duffy Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/agent.c | 8 ++++---- drivers/infiniband/core/verbs.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index dde25ee81b65..729f0b0d983a 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -156,10 +156,10 @@ static int agent_mad_send(struct ib_mad_agent *mad_agent, /* Should sgid be looked up ? */ ah_attr.grh.sgid_index = 0; ah_attr.grh.hop_limit = grh->hop_limit; - ah_attr.grh.flow_label = be32_to_cpup( - &grh->version_tclass_flow) & 0xfffff; - ah_attr.grh.traffic_class = (be32_to_cpup( - &grh->version_tclass_flow) >> 20) & 0xff; + ah_attr.grh.flow_label = be32_to_cpu( + grh->version_tclass_flow) & 0xfffff; + ah_attr.grh.traffic_class = (be32_to_cpu( + grh->version_tclass_flow) >> 20) & 0xff; memcpy(ah_attr.grh.dgid.raw, grh->sgid.raw, sizeof(ah_attr.grh.dgid)); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 62951594eec6..506fdf1f2a26 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -113,7 +113,7 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, return ERR_PTR(ret); ah_attr.grh.sgid_index = (u8) gid_index; - flow_class = be32_to_cpu(&grh->version_tclass_flow); + flow_class = be32_to_cpu(grh->version_tclass_flow); ah_attr.grh.flow_label = flow_class & 0xFFFFF; ah_attr.grh.traffic_class = (flow_class >> 20) & 0xFF; ah_attr.grh.hop_limit = grh->hop_limit; -- cgit v1.2.3-55-g7522 From d2082ee516200095956bd66279be4f62f4a5843d Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:36 -0700 Subject: [PATCH] IB: Introduce RMPP APIs Introduce RMPP APIs Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/mad.c | 4 +- drivers/infiniband/core/sa_query.c | 20 ------ drivers/infiniband/include/ib_mad.h | 132 +++++++++++++++++++++++++++++++++--- drivers/infiniband/include/ib_sa.h | 4 -- 4 files changed, 125 insertions(+), 35 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 8216af0ba783..26e2b59ce5a6 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -777,7 +777,7 @@ static int get_buf_length(int hdr_len, int data_len) struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, - struct ib_ah *ah, + struct ib_ah *ah, int rmpp_active, int hdr_len, int data_len, unsigned int __nocast gfp_mask) { @@ -786,6 +786,8 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, int buf_size; void *buf; + if (rmpp_active) + return ERR_PTR(-EINVAL); /* until RMPP implemented */ mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); buf_size = get_buf_length(hdr_len, data_len); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 9ef8fe0163dd..4ec80443702f 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -50,26 +50,6 @@ MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand subnet administration query support"); MODULE_LICENSE("Dual BSD/GPL"); -/* - * These two structures must be packed because they have 64-bit fields - * that are only 32-bit aligned. 64-bit architectures will lay them - * out wrong otherwise. (And unfortunately they are sent on the wire - * so we can't change the layout) - */ -struct ib_sa_hdr { - u64 sm_key; - u16 attr_offset; - u16 reserved; - ib_sa_comp_mask comp_mask; -} __attribute__ ((packed)); - -struct ib_sa_mad { - struct ib_mad_hdr mad_hdr; - struct ib_rmpp_hdr rmpp_hdr; - struct ib_sa_hdr sa_hdr; - u8 data[200]; -} __attribute__ ((packed)); - struct ib_sa_sm_ah { struct ib_ah *ah; struct kref ref; diff --git a/drivers/infiniband/include/ib_mad.h b/drivers/infiniband/include/ib_mad.h index 817e932c79c0..491b6f25b3b8 100644 --- a/drivers/infiniband/include/ib_mad.h +++ b/drivers/infiniband/include/ib_mad.h @@ -33,7 +33,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: ib_mad.h 1389 2004-12-27 22:56:47Z roland $ + * $Id: ib_mad.h 2775 2005-07-02 13:42:12Z halr $ */ #if !defined( IB_MAD_H ) @@ -58,6 +58,8 @@ #define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30 #define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F +#define IB_OPENIB_OUI (0x001405) + /* Management methods */ #define IB_MGMT_METHOD_GET 0x01 #define IB_MGMT_METHOD_SET 0x02 @@ -72,6 +74,33 @@ #define IB_MGMT_MAX_METHODS 128 +/* RMPP information */ +#define IB_MGMT_RMPP_VERSION 1 + +#define IB_MGMT_RMPP_TYPE_DATA 1 +#define IB_MGMT_RMPP_TYPE_ACK 2 +#define IB_MGMT_RMPP_TYPE_STOP 3 +#define IB_MGMT_RMPP_TYPE_ABORT 4 + +#define IB_MGMT_RMPP_FLAG_ACTIVE 1 +#define IB_MGMT_RMPP_FLAG_FIRST (1<<1) +#define IB_MGMT_RMPP_FLAG_LAST (1<<2) + +#define IB_MGMT_RMPP_NO_RESPTIME 0x1F + +#define IB_MGMT_RMPP_STATUS_SUCCESS 0 +#define IB_MGMT_RMPP_STATUS_RESX 1 +#define IB_MGMT_RMPP_STATUS_T2L 118 +#define IB_MGMT_RMPP_STATUS_BAD_LEN 119 +#define IB_MGMT_RMPP_STATUS_BAD_SEG 120 +#define IB_MGMT_RMPP_STATUS_BADT 121 +#define IB_MGMT_RMPP_STATUS_W2S 122 +#define IB_MGMT_RMPP_STATUS_S2B 123 +#define IB_MGMT_RMPP_STATUS_BAD_STATUS 124 +#define IB_MGMT_RMPP_STATUS_UNV 125 +#define IB_MGMT_RMPP_STATUS_TMR 126 +#define IB_MGMT_RMPP_STATUS_UNSPEC 127 + #define IB_QP0 0 #define IB_QP1 __constant_htonl(1) #define IB_QP1_QKEY 0x80010000 @@ -88,7 +117,7 @@ struct ib_mad_hdr { u16 attr_id; u16 resv; u32 attr_mod; -} __attribute__ ((packed)); +}; struct ib_rmpp_hdr { u8 rmpp_version; @@ -97,17 +126,41 @@ struct ib_rmpp_hdr { u8 rmpp_status; u32 seg_num; u32 paylen_newwin; +}; + +typedef u64 __bitwise ib_sa_comp_mask; + +#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << n)) + +/* + * ib_sa_hdr and ib_sa_mad structures must be packed because they have + * 64-bit fields that are only 32-bit aligned. 64-bit architectures will + * lay them out wrong otherwise. (And unfortunately they are sent on + * the wire so we can't change the layout) + */ +struct ib_sa_hdr { + u64 sm_key; + u16 attr_offset; + u16 reserved; + ib_sa_comp_mask comp_mask; } __attribute__ ((packed)); struct ib_mad { struct ib_mad_hdr mad_hdr; u8 data[232]; -} __attribute__ ((packed)); +}; struct ib_rmpp_mad { struct ib_mad_hdr mad_hdr; struct ib_rmpp_hdr rmpp_hdr; u8 data[220]; +}; + +struct ib_sa_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + struct ib_sa_hdr sa_hdr; + u8 data[200]; } __attribute__ ((packed)); struct ib_vendor_mad { @@ -116,7 +169,7 @@ struct ib_vendor_mad { u8 reserved; u8 oui[3]; u8 data[216]; -} __attribute__ ((packed)); +}; /** * ib_mad_send_buf - MAD data buffer and work request for sends. @@ -142,6 +195,45 @@ struct ib_mad_send_buf { struct ib_sge sge; }; +/** + * ib_get_rmpp_resptime - Returns the RMPP response time. + * @rmpp_hdr: An RMPP header. + */ +static inline u8 ib_get_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr) +{ + return rmpp_hdr->rmpp_rtime_flags >> 3; +} + +/** + * ib_get_rmpp_flags - Returns the RMPP flags. + * @rmpp_hdr: An RMPP header. + */ +static inline u8 ib_get_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr) +{ + return rmpp_hdr->rmpp_rtime_flags & 0x7; +} + +/** + * ib_set_rmpp_resptime - Sets the response time in an RMPP header. + * @rmpp_hdr: An RMPP header. + * @rtime: The response time to set. + */ +static inline void ib_set_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr, u8 rtime) +{ + rmpp_hdr->rmpp_rtime_flags = ib_get_rmpp_flags(rmpp_hdr) | (rtime << 3); +} + +/** + * ib_set_rmpp_flags - Sets the flags in an RMPP header. + * @rmpp_hdr: An RMPP header. + * @flags: The flags to set. + */ +static inline void ib_set_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr, u8 flags) +{ + rmpp_hdr->rmpp_rtime_flags = (rmpp_hdr->rmpp_rtime_flags & 0xF1) | + (flags & 0x7); +} + struct ib_mad_agent; struct ib_mad_send_wc; struct ib_mad_recv_wc; @@ -186,6 +278,7 @@ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, * ib_mad_agent - Used to track MAD registration with the access layer. * @device: Reference to device registration is on. * @qp: Reference to QP used for sending and receiving MADs. + * @mr: Memory region for system memory usable for DMA. * @recv_handler: Callback handler for a received MAD. * @send_handler: Callback handler for a sent MAD. * @snoop_handler: Callback handler for snooped sent MADs. @@ -194,6 +287,7 @@ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, * Unsolicited MADs sent by this client will have the upper 32-bits * of their TID set to this value. * @port_num: Port number on which QP is registered + * @rmpp_version: If set, indicates the RMPP version used by this agent. */ struct ib_mad_agent { struct ib_device *device; @@ -205,6 +299,7 @@ struct ib_mad_agent { void *context; u32 hi_tid; u8 port_num; + u8 rmpp_version; }; /** @@ -238,6 +333,7 @@ struct ib_mad_recv_buf { * ib_mad_recv_wc - received MAD information. * @wc: Completion information for the received data. * @recv_buf: Specifies the location of the received data buffer(s). + * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers. * @mad_len: The length of the received MAD, without duplicated headers. * * For received response, the wr_id field of the wc is set to the wr_id @@ -246,6 +342,7 @@ struct ib_mad_recv_buf { struct ib_mad_recv_wc { struct ib_wc *wc; struct ib_mad_recv_buf recv_buf; + struct list_head rmpp_list; int mad_len; }; @@ -341,6 +438,16 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); * @bad_send_wr: Specifies the MAD on which an error was encountered. * * Sent MADs are not guaranteed to complete in the order that they were posted. + * + * If the MAD requires RMPP, the data buffer should contain a single copy + * of the common MAD, RMPP, and class specific headers, followed by the class + * defined data. If the class defined data would not divide evenly into + * RMPP segments, then space must be allocated at the end of the referenced + * buffer for any required padding. To indicate the amount of class defined + * data being transferred, the paylen_newwin field in the RMPP header should + * be set to the size of the class specific header plus the amount of class + * defined data being transferred. The paylen_newwin field should be + * specified in network-byte order. */ int ib_post_send_mad(struct ib_mad_agent *mad_agent, struct ib_send_wr *send_wr, @@ -353,14 +460,13 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent, * referenced buffer should be at least the size of the mad_len specified * by @mad_recv_wc. * - * This call copies a chain of received RMPP MADs into a single data buffer, + * This call copies a chain of received MAD segments into a single data buffer, * removing duplicated headers. */ void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf); /** - * ib_free_recv_mad - Returns data buffers used to receive a MAD to the - * access layer. + * ib_free_recv_mad - Returns data buffers used to receive a MAD. * @mad_recv_wc: Work completion information for a received MAD. * * Clients receiving MADs through their ib_mad_recv_handler must call this @@ -437,10 +543,11 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, * @pkey_index: Specifies which PKey the MAD will be sent using. This field * is valid only if the remote_qpn is QP 1. * @ah: References the address handle used to transfer to the remote node. + * @rmpp_active: Indicates if the send will enable RMPP. * @hdr_len: Indicates the size of the data header of the MAD. This length * should include the common MAD header, RMPP header, plus any class * specific header. - * @data_len: Indicates the size of any user-transfered data. The call will + * @data_len: Indicates the size of any user-transferred data. The call will * automatically adjust the allocated buffer size to account for any * additional padding that may be necessary. * @gfp_mask: GFP mask used for the memory allocation. @@ -448,11 +555,16 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, * This is a helper routine that may be used to allocate a MAD. Users are * not required to allocate outbound MADs using this call. The returned * MAD send buffer will reference a data buffer usable for sending a MAD, along - * with an intialized work request structure. + * with an initialized work request structure. Users may modify the returned + * MAD data buffer or work request before posting the send. + * + * The returned data buffer will be cleared. Users are responsible for + * initializing the common MAD and any class specific headers. If @rmpp_active + * is set, the RMPP header will be initialized for sending. */ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, - struct ib_ah *ah, + struct ib_ah *ah, int rmpp_active, int hdr_len, int data_len, unsigned int __nocast gfp_mask); diff --git a/drivers/infiniband/include/ib_sa.h b/drivers/infiniband/include/ib_sa.h index 00222285eb9a..49a95ca2b8f6 100644 --- a/drivers/infiniband/include/ib_sa.h +++ b/drivers/infiniband/include/ib_sa.h @@ -87,10 +87,6 @@ static inline int ib_sa_rate_enum_to_int(enum ib_sa_rate rate) } } -typedef u64 __bitwise ib_sa_comp_mask; - -#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << n)) - /* * Structures for SA records are named "struct ib_sa_xxx_rec." No * attempt is made to pack structures to match the physical layout of -- cgit v1.2.3-55-g7522 From fa619a77046bef30478697aba0553991033afb8e Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:37 -0700 Subject: [PATCH] IB: Add RMPP implementation Add RMPP implementation. Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/mad.c | 163 +++++--- drivers/infiniband/core/mad_priv.h | 28 +- drivers/infiniband/core/mad_rmpp.c | 765 +++++++++++++++++++++++++++++++++++++ drivers/infiniband/core/mad_rmpp.h | 58 +++ 5 files changed, 966 insertions(+), 50 deletions(-) create mode 100644 drivers/infiniband/core/mad_rmpp.c create mode 100644 drivers/infiniband/core/mad_rmpp.h (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index e1a7cf3e8636..96b8eba95849 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_INFINIBAND_USER_VERBS) += ib_uverbs.o ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ device.o fmr_pool.o cache.o -ib_mad-y := mad.o smi.o agent.o +ib_mad-y := mad.o smi.o agent.o mad_rmpp.o ib_sa-y := sa_query.o diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 26e2b59ce5a6..b97e210ce9c8 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -29,12 +31,12 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: mad.c 1389 2004-12-27 22:56:47Z roland $ + * $Id: mad.c 2817 2005-07-07 11:29:26Z halr $ */ - #include #include "mad_priv.h" +#include "mad_rmpp.h" #include "smi.h" #include "agent.h" @@ -45,6 +47,7 @@ MODULE_AUTHOR("Sean Hefty"); kmem_cache_t *ib_mad_cache; + static struct list_head ib_mad_port_list; static u32 ib_mad_client_id = 0; @@ -62,8 +65,6 @@ static struct ib_mad_agent_private *find_mad_agent( static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad); static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); -static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, - struct ib_mad_send_wc *mad_send_wc); static void timeout_sends(void *data); static void local_completions(void *data); static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, @@ -195,8 +196,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, if (qpn == -1) goto error1; - if (rmpp_version) - goto error1; /* XXX: until RMPP implemented */ + if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) + goto error1; /* Validate MAD registration request if supplied */ if (mad_reg_req) { @@ -281,7 +282,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, /* Now, fill in the various structures */ mad_agent_priv->qp_info = &port_priv->qp_info[qpn]; mad_agent_priv->reg_req = reg_req; - mad_agent_priv->rmpp_version = rmpp_version; + mad_agent_priv->agent.rmpp_version = rmpp_version; mad_agent_priv->agent.device = device; mad_agent_priv->agent.recv_handler = recv_handler; mad_agent_priv->agent.send_handler = send_handler; @@ -341,6 +342,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, INIT_LIST_HEAD(&mad_agent_priv->send_list); INIT_LIST_HEAD(&mad_agent_priv->wait_list); INIT_LIST_HEAD(&mad_agent_priv->done_list); + INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); INIT_WORK(&mad_agent_priv->timed_work, timeout_sends, mad_agent_priv); INIT_LIST_HEAD(&mad_agent_priv->local_list); INIT_WORK(&mad_agent_priv->local_work, local_completions, @@ -502,6 +504,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) spin_unlock_irqrestore(&port_priv->reg_lock, flags); flush_workqueue(port_priv->wq); + ib_cancel_rmpp_recvs(mad_agent_priv); atomic_dec(&mad_agent_priv->refcount); wait_event(mad_agent_priv->wait, @@ -786,12 +789,15 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, int buf_size; void *buf; - if (rmpp_active) - return ERR_PTR(-EINVAL); /* until RMPP implemented */ mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); buf_size = get_buf_length(hdr_len, data_len); + if ((!mad_agent->rmpp_version && + (rmpp_active || buf_size > sizeof(struct ib_mad))) || + (!rmpp_active && buf_size > sizeof(struct ib_mad))) + return ERR_PTR(-EINVAL); + buf = kmalloc(sizeof *send_buf + buf_size, gfp_mask); if (!buf) return ERR_PTR(-ENOMEM); @@ -816,6 +822,18 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, send_buf->send_wr.wr.ud.remote_qpn = remote_qpn; send_buf->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; send_buf->send_wr.wr.ud.pkey_index = pkey_index; + + if (rmpp_active) { + struct ib_rmpp_mad *rmpp_mad; + rmpp_mad = (struct ib_rmpp_mad *)send_buf->mad; + rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len - + offsetof(struct ib_rmpp_mad, data) + data_len); + rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version; + rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, + IB_MGMT_RMPP_FLAG_ACTIVE); + } + send_buf->mad_agent = mad_agent; atomic_inc(&mad_agent_priv->refcount); return send_buf; @@ -839,7 +857,7 @@ void ib_free_send_mad(struct ib_mad_send_buf *send_buf) } EXPORT_SYMBOL(ib_free_send_mad); -static int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) +int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_qp_info *qp_info; struct ib_send_wr *bad_send_wr; @@ -940,13 +958,13 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent, ret = -ENOMEM; goto error2; } + memset(mad_send_wr, 0, sizeof *mad_send_wr); mad_send_wr->send_wr = *send_wr; mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; memcpy(mad_send_wr->sg_list, send_wr->sg_list, sizeof *send_wr->sg_list * send_wr->num_sge); - mad_send_wr->wr_id = mad_send_wr->send_wr.wr_id; - mad_send_wr->send_wr.next = NULL; + mad_send_wr->wr_id = send_wr->wr_id; mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid; mad_send_wr->mad_agent_priv = mad_agent_priv; /* Timeout will be updated after send completes */ @@ -964,8 +982,13 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent, &mad_agent_priv->send_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - ret = ib_send_mad(mad_send_wr); - if (ret) { + if (mad_agent_priv->agent.rmpp_version) { + ret = ib_send_rmpp_mad(mad_send_wr); + if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED) + ret = ib_send_mad(mad_send_wr); + } else + ret = ib_send_mad(mad_send_wr); + if (ret < 0) { /* Fail send request */ spin_lock_irqsave(&mad_agent_priv->lock, flags); list_del(&mad_send_wr->agent_list); @@ -991,31 +1014,25 @@ EXPORT_SYMBOL(ib_post_send_mad); */ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) { - struct ib_mad_recv_buf *entry; + struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf; struct ib_mad_private_header *mad_priv_hdr; struct ib_mad_private *priv; + struct list_head free_list; - mad_priv_hdr = container_of(mad_recv_wc, - struct ib_mad_private_header, - recv_wc); - priv = container_of(mad_priv_hdr, struct ib_mad_private, header); + INIT_LIST_HEAD(&free_list); + list_splice_init(&mad_recv_wc->rmpp_list, &free_list); - /* - * Walk receive buffer list associated with this WC - * No need to remove them from list of receive buffers - */ - list_for_each_entry(entry, &mad_recv_wc->recv_buf.list, list) { - /* Free previous receive buffer */ - kmem_cache_free(ib_mad_cache, priv); + list_for_each_entry_safe(mad_recv_buf, temp_recv_buf, + &free_list, list) { + mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc, + recv_buf); mad_priv_hdr = container_of(mad_recv_wc, struct ib_mad_private_header, recv_wc); priv = container_of(mad_priv_hdr, struct ib_mad_private, header); + kmem_cache_free(ib_mad_cache, priv); } - - /* Free last buffer */ - kmem_cache_free(ib_mad_cache, priv); } EXPORT_SYMBOL(ib_free_recv_mad); @@ -1524,9 +1541,20 @@ out: return valid; } -static struct ib_mad_send_wr_private* -find_send_req(struct ib_mad_agent_private *mad_agent_priv, - u64 tid) +static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_hdr *mad_hdr) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *)mad_hdr; + return !mad_agent_priv->agent.rmpp_version || + !(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE) || + (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); +} + +struct ib_mad_send_wr_private* +ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, u64 tid) { struct ib_mad_send_wr_private *mad_send_wr; @@ -1542,7 +1570,9 @@ find_send_req(struct ib_mad_agent_private *mad_agent_priv, */ list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { - if (mad_send_wr->tid == tid && mad_send_wr->timeout) { + if (is_data_mad(mad_agent_priv, + mad_send_wr->send_wr.wr.ud.mad_hdr) && + mad_send_wr->tid == tid && mad_send_wr->timeout) { /* Verify request has not been canceled */ return (mad_send_wr->status == IB_WC_SUCCESS) ? mad_send_wr : NULL; @@ -1551,7 +1581,7 @@ find_send_req(struct ib_mad_agent_private *mad_agent_priv, return NULL; } -static void ib_mark_req_done(struct ib_mad_send_wr_private *mad_send_wr) +void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr) { mad_send_wr->timeout = 0; if (mad_send_wr->refcount == 1) { @@ -1569,12 +1599,23 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, unsigned long flags; u64 tid; - INIT_LIST_HEAD(&mad_recv_wc->recv_buf.list); + INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); + list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); + if (mad_agent_priv->agent.rmpp_version) { + mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv, + mad_recv_wc); + if (!mad_recv_wc) { + if (atomic_dec_and_test(&mad_agent_priv->refcount)) + wake_up(&mad_agent_priv->wait); + return; + } + } + /* Complete corresponding request */ if (response_mad(mad_recv_wc->recv_buf.mad)) { tid = mad_recv_wc->recv_buf.mad->mad_hdr.tid; spin_lock_irqsave(&mad_agent_priv->lock, flags); - mad_send_wr = find_send_req(mad_agent_priv, tid); + mad_send_wr = ib_find_send_mad(mad_agent_priv, tid); if (!mad_send_wr) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); ib_free_recv_mad(mad_recv_wc); @@ -1582,7 +1623,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, wake_up(&mad_agent_priv->wait); return; } - ib_mark_req_done(mad_send_wr); + ib_mark_mad_done(mad_send_wr); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Defined behavior is to complete response before request */ @@ -1787,14 +1828,22 @@ void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, /* * Process a send work completion */ -static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, - struct ib_mad_send_wc *mad_send_wc) +void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_send_wc *mad_send_wc) { struct ib_mad_agent_private *mad_agent_priv; unsigned long flags; + int ret; mad_agent_priv = mad_send_wr->mad_agent_priv; spin_lock_irqsave(&mad_agent_priv->lock, flags); + if (mad_agent_priv->agent.rmpp_version) { + ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc); + if (ret == IB_RMPP_RESULT_CONSUMED) + goto done; + } else + ret = IB_RMPP_RESULT_UNHANDLED; + if (mad_send_wc->status != IB_WC_SUCCESS && mad_send_wr->status == IB_WC_SUCCESS) { mad_send_wr->status = mad_send_wc->status; @@ -1806,8 +1855,7 @@ static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, mad_send_wr->status == IB_WC_SUCCESS) { wait_for_response(mad_send_wr); } - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - return; + goto done; } /* Remove send from MAD agent and notify client of completion */ @@ -1817,14 +1865,18 @@ static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, if (mad_send_wr->status != IB_WC_SUCCESS ) mad_send_wc->status = mad_send_wr->status; - mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, - mad_send_wc); + if (ret != IB_RMPP_RESULT_INTERNAL) + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + mad_send_wc); /* Release reference on agent taken when sending */ if (atomic_dec_and_test(&mad_agent_priv->refcount)) wake_up(&mad_agent_priv->wait); kfree(mad_send_wr); + return; +done: + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); } static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, @@ -2036,7 +2088,9 @@ find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, u64 wr_id) list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { - if (mad_send_wr->wr_id == wr_id) + if (is_data_mad(mad_agent_priv, + mad_send_wr->send_wr.wr.ud.mad_hdr) && + mad_send_wr->wr_id == wr_id) return mad_send_wr; } return NULL; @@ -2118,7 +2172,9 @@ static void local_completions(void *data) local->mad_priv->header.recv_wc.wc = &wc; local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad); - INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.recv_buf.list); + INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list); + list_add(&local->mad_priv->header.recv_wc.recv_buf.list, + &local->mad_priv->header.recv_wc.rmpp_list); local->mad_priv->header.recv_wc.recv_buf.grh = NULL; local->mad_priv->header.recv_wc.recv_buf.mad = &local->mad_priv->mad.mad; @@ -2166,7 +2222,21 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_wr. wr.ud.timeout_ms); - ret = ib_send_mad(mad_send_wr); + if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { + ret = ib_retry_rmpp(mad_send_wr); + switch (ret) { + case IB_RMPP_RESULT_UNHANDLED: + ret = ib_send_mad(mad_send_wr); + break; + case IB_RMPP_RESULT_CONSUMED: + ret = 0; + break; + default: + ret = -ECOMM; + break; + } + } else + ret = ib_send_mad(mad_send_wr); if (!ret) { mad_send_wr->refcount++; @@ -2724,3 +2794,4 @@ static void __exit ib_mad_cleanup_module(void) module_init(ib_mad_init_module); module_exit(ib_mad_cleanup_module); + diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index e5e37b5be387..568da10b05ab 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005, Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -29,7 +31,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: mad_priv.h 1980 2005-03-11 22:33:53Z sean.hefty $ + * $Id: mad_priv.h 2730 2005-06-28 16:43:03Z sean.hefty $ */ #ifndef __IB_MAD_PRIV_H__ @@ -97,10 +99,10 @@ struct ib_mad_agent_private { unsigned long timeout; struct list_head local_list; struct work_struct local_work; + struct list_head rmpp_list; atomic_t refcount; wait_queue_head_t wait; - u8 rmpp_version; }; struct ib_mad_snoop_private { @@ -125,6 +127,14 @@ struct ib_mad_send_wr_private { int retry; int refcount; enum ib_wc_status status; + + /* RMPP control */ + int last_ack; + int seg_num; + int newwin; + int total_seg; + int data_offset; + int pad; }; struct ib_mad_local_private { @@ -135,7 +145,6 @@ struct ib_mad_local_private { struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; u64 wr_id; /* client WR ID */ u64 tid; - int retries; }; struct ib_mad_mgmt_method_table { @@ -198,4 +207,17 @@ struct ib_mad_port_private { extern kmem_cache_t *ib_mad_cache; +int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr); + +struct ib_mad_send_wr_private * +ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, u64 tid); + +void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_send_wc *mad_send_wc); + +void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr); + +void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, + int timeout_ms); + #endif /* __IB_MAD_PRIV_H__ */ diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c new file mode 100644 index 000000000000..8f1eb80e421f --- /dev/null +++ b/drivers/infiniband/core/mad_rmpp.c @@ -0,0 +1,765 @@ +/* + * Copyright (c) 2005 Intel Inc. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $ + */ + +#include + +#include "mad_priv.h" +#include "mad_rmpp.h" + +enum rmpp_state { + RMPP_STATE_ACTIVE, + RMPP_STATE_TIMEOUT, + RMPP_STATE_COMPLETE +}; + +struct mad_rmpp_recv { + struct ib_mad_agent_private *agent; + struct list_head list; + struct work_struct timeout_work; + struct work_struct cleanup_work; + wait_queue_head_t wait; + enum rmpp_state state; + spinlock_t lock; + atomic_t refcount; + + struct ib_ah *ah; + struct ib_mad_recv_wc *rmpp_wc; + struct ib_mad_recv_buf *cur_seg_buf; + int last_ack; + int seg_num; + int newwin; + + u64 tid; + u32 src_qp; + u16 slid; + u8 mgmt_class; + u8 class_version; + u8 method; +}; + +static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) +{ + atomic_dec(&rmpp_recv->refcount); + wait_event(rmpp_recv->wait, !atomic_read(&rmpp_recv->refcount)); + ib_destroy_ah(rmpp_recv->ah); + kfree(rmpp_recv); +} + +void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent) +{ + struct mad_rmpp_recv *rmpp_recv, *temp_rmpp_recv; + unsigned long flags; + + spin_lock_irqsave(&agent->lock, flags); + list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { + cancel_delayed_work(&rmpp_recv->timeout_work); + cancel_delayed_work(&rmpp_recv->cleanup_work); + } + spin_unlock_irqrestore(&agent->lock, flags); + + flush_workqueue(agent->qp_info->port_priv->wq); + + list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv, + &agent->rmpp_list, list) { + list_del(&rmpp_recv->list); + if (rmpp_recv->state != RMPP_STATE_COMPLETE) + ib_free_recv_mad(rmpp_recv->rmpp_wc); + destroy_rmpp_recv(rmpp_recv); + } +} + +static void recv_timeout_handler(void *data) +{ + struct mad_rmpp_recv *rmpp_recv = data; + struct ib_mad_recv_wc *rmpp_wc; + unsigned long flags; + + spin_lock_irqsave(&rmpp_recv->agent->lock, flags); + if (rmpp_recv->state != RMPP_STATE_ACTIVE) { + spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); + return; + } + rmpp_recv->state = RMPP_STATE_TIMEOUT; + list_del(&rmpp_recv->list); + spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); + + /* TODO: send abort. */ + rmpp_wc = rmpp_recv->rmpp_wc; + destroy_rmpp_recv(rmpp_recv); + ib_free_recv_mad(rmpp_wc); +} + +static void recv_cleanup_handler(void *data) +{ + struct mad_rmpp_recv *rmpp_recv = data; + unsigned long flags; + + spin_lock_irqsave(&rmpp_recv->agent->lock, flags); + list_del(&rmpp_recv->list); + spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); + destroy_rmpp_recv(rmpp_recv); +} + +static struct mad_rmpp_recv * +create_rmpp_recv(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct mad_rmpp_recv *rmpp_recv; + struct ib_mad_hdr *mad_hdr; + + rmpp_recv = kmalloc(sizeof *rmpp_recv, GFP_KERNEL); + if (!rmpp_recv) + return NULL; + + rmpp_recv->ah = ib_create_ah_from_wc(agent->agent.qp->pd, + mad_recv_wc->wc, + mad_recv_wc->recv_buf.grh, + agent->agent.port_num); + if (IS_ERR(rmpp_recv->ah)) + goto error; + + rmpp_recv->agent = agent; + init_waitqueue_head(&rmpp_recv->wait); + INIT_WORK(&rmpp_recv->timeout_work, recv_timeout_handler, rmpp_recv); + INIT_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler, rmpp_recv); + spin_lock_init(&rmpp_recv->lock); + rmpp_recv->state = RMPP_STATE_ACTIVE; + atomic_set(&rmpp_recv->refcount, 1); + + rmpp_recv->rmpp_wc = mad_recv_wc; + rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf; + rmpp_recv->newwin = 1; + rmpp_recv->seg_num = 1; + rmpp_recv->last_ack = 0; + + mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr; + rmpp_recv->tid = mad_hdr->tid; + rmpp_recv->src_qp = mad_recv_wc->wc->src_qp; + rmpp_recv->slid = mad_recv_wc->wc->slid; + rmpp_recv->mgmt_class = mad_hdr->mgmt_class; + rmpp_recv->class_version = mad_hdr->class_version; + rmpp_recv->method = mad_hdr->method; + return rmpp_recv; + +error: kfree(rmpp_recv); + return NULL; +} + +static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) +{ + if (atomic_dec_and_test(&rmpp_recv->refcount)) + wake_up(&rmpp_recv->wait); +} + +static struct mad_rmpp_recv * +find_rmpp_recv(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct mad_rmpp_recv *rmpp_recv; + struct ib_mad_hdr *mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr; + + list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { + if (rmpp_recv->tid == mad_hdr->tid && + rmpp_recv->src_qp == mad_recv_wc->wc->src_qp && + rmpp_recv->slid == mad_recv_wc->wc->slid && + rmpp_recv->mgmt_class == mad_hdr->mgmt_class && + rmpp_recv->class_version == mad_hdr->class_version && + rmpp_recv->method == mad_hdr->method) + return rmpp_recv; + } + return NULL; +} + +static struct mad_rmpp_recv * +acquire_rmpp_recv(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct mad_rmpp_recv *rmpp_recv; + unsigned long flags; + + spin_lock_irqsave(&agent->lock, flags); + rmpp_recv = find_rmpp_recv(agent, mad_recv_wc); + if (rmpp_recv) + atomic_inc(&rmpp_recv->refcount); + spin_unlock_irqrestore(&agent->lock, flags); + return rmpp_recv; +} + +static struct mad_rmpp_recv * +insert_rmpp_recv(struct ib_mad_agent_private *agent, + struct mad_rmpp_recv *rmpp_recv) +{ + struct mad_rmpp_recv *cur_rmpp_recv; + + cur_rmpp_recv = find_rmpp_recv(agent, rmpp_recv->rmpp_wc); + if (!cur_rmpp_recv) + list_add_tail(&rmpp_recv->list, &agent->rmpp_list); + + return cur_rmpp_recv; +} + +static int data_offset(u8 mgmt_class) +{ + if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) + return offsetof(struct ib_sa_mad, data); + else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && + (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) + return offsetof(struct ib_vendor_mad, data); + else + return offsetof(struct ib_rmpp_mad, data); +} + +static void format_ack(struct ib_rmpp_mad *ack, + struct ib_rmpp_mad *data, + struct mad_rmpp_recv *rmpp_recv) +{ + unsigned long flags; + + memcpy(&ack->mad_hdr, &data->mad_hdr, + data_offset(data->mad_hdr.mgmt_class)); + + ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP; + ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK; + ib_set_rmpp_flags(&ack->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + + spin_lock_irqsave(&rmpp_recv->lock, flags); + rmpp_recv->last_ack = rmpp_recv->seg_num; + ack->rmpp_hdr.seg_num = cpu_to_be32(rmpp_recv->seg_num); + ack->rmpp_hdr.paylen_newwin = cpu_to_be32(rmpp_recv->newwin); + spin_unlock_irqrestore(&rmpp_recv->lock, flags); +} + +static void ack_recv(struct mad_rmpp_recv *rmpp_recv, + struct ib_mad_recv_wc *recv_wc) +{ + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + int hdr_len, ret; + + hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr); + msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, + recv_wc->wc->pkey_index, rmpp_recv->ah, 1, + hdr_len, sizeof(struct ib_rmpp_mad) - hdr_len, + GFP_KERNEL); + if (!msg) + return; + + format_ack((struct ib_rmpp_mad *) msg->mad, + (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv); + ret = ib_post_send_mad(&rmpp_recv->agent->agent, &msg->send_wr, + &bad_send_wr); + if (ret) + ib_free_send_mad(msg); +} + +static inline int get_last_flag(struct ib_mad_recv_buf *seg) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *) seg->mad; + return ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_LAST; +} + +static inline int get_seg_num(struct ib_mad_recv_buf *seg) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *) seg->mad; + return be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num); +} + +static inline struct ib_mad_recv_buf * get_next_seg(struct list_head *rmpp_list, + struct ib_mad_recv_buf *seg) +{ + if (seg->list.next == rmpp_list) + return NULL; + + return container_of(seg->list.next, struct ib_mad_recv_buf, list); +} + +static inline int window_size(struct ib_mad_agent_private *agent) +{ + return max(agent->qp_info->recv_queue.max_active >> 3, 1); +} + +static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list, + int seg_num) +{ + struct ib_mad_recv_buf *seg_buf; + int cur_seg_num; + + list_for_each_entry_reverse(seg_buf, rmpp_list, list) { + cur_seg_num = get_seg_num(seg_buf); + if (seg_num > cur_seg_num) + return seg_buf; + if (seg_num == cur_seg_num) + break; + } + return NULL; +} + +static void update_seg_num(struct mad_rmpp_recv *rmpp_recv, + struct ib_mad_recv_buf *new_buf) +{ + struct list_head *rmpp_list = &rmpp_recv->rmpp_wc->rmpp_list; + + while (new_buf && (get_seg_num(new_buf) == rmpp_recv->seg_num + 1)) { + rmpp_recv->cur_seg_buf = new_buf; + rmpp_recv->seg_num++; + new_buf = get_next_seg(rmpp_list, new_buf); + } +} + +static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv) +{ + struct ib_rmpp_mad *rmpp_mad; + int hdr_size, data_size, pad; + + rmpp_mad = (struct ib_rmpp_mad *)rmpp_recv->cur_seg_buf->mad; + + hdr_size = data_offset(rmpp_mad->mad_hdr.mgmt_class); + data_size = sizeof(struct ib_rmpp_mad) - hdr_size; + pad = data_size - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + if (pad > data_size || pad < 0) + pad = 0; + + return hdr_size + rmpp_recv->seg_num * data_size - pad; +} + +static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv) +{ + struct ib_mad_recv_wc *rmpp_wc; + + ack_recv(rmpp_recv, rmpp_recv->rmpp_wc); + if (rmpp_recv->seg_num > 1) + cancel_delayed_work(&rmpp_recv->timeout_work); + + rmpp_wc = rmpp_recv->rmpp_wc; + rmpp_wc->mad_len = get_mad_len(rmpp_recv); + /* 10 seconds until we can find the packet lifetime */ + queue_delayed_work(rmpp_recv->agent->qp_info->port_priv->wq, + &rmpp_recv->cleanup_work, msecs_to_jiffies(10000)); + return rmpp_wc; +} + +void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf) +{ + struct ib_mad_recv_buf *seg_buf; + struct ib_rmpp_mad *rmpp_mad; + void *data; + int size, len, offset; + u8 flags; + + len = mad_recv_wc->mad_len; + if (len <= sizeof(struct ib_mad)) { + memcpy(buf, mad_recv_wc->recv_buf.mad, len); + return; + } + + offset = data_offset(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class); + + list_for_each_entry(seg_buf, &mad_recv_wc->rmpp_list, list) { + rmpp_mad = (struct ib_rmpp_mad *)seg_buf->mad; + flags = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr); + + if (flags & IB_MGMT_RMPP_FLAG_FIRST) { + data = rmpp_mad; + size = sizeof(*rmpp_mad); + } else { + data = (void *) rmpp_mad + offset; + if (flags & IB_MGMT_RMPP_FLAG_LAST) + size = len; + else + size = sizeof(*rmpp_mad) - offset; + } + + memcpy(buf, data, size); + len -= size; + buf += size; + } +} +EXPORT_SYMBOL(ib_coalesce_recv_mad); + +static struct ib_mad_recv_wc * +continue_rmpp(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct mad_rmpp_recv *rmpp_recv; + struct ib_mad_recv_buf *prev_buf; + struct ib_mad_recv_wc *done_wc; + int seg_num; + unsigned long flags; + + rmpp_recv = acquire_rmpp_recv(agent, mad_recv_wc); + if (!rmpp_recv) + goto drop1; + + seg_num = get_seg_num(&mad_recv_wc->recv_buf); + + spin_lock_irqsave(&rmpp_recv->lock, flags); + if ((rmpp_recv->state == RMPP_STATE_TIMEOUT) || + (seg_num > rmpp_recv->newwin)) + goto drop3; + + if ((seg_num <= rmpp_recv->last_ack) || + (rmpp_recv->state == RMPP_STATE_COMPLETE)) { + spin_unlock_irqrestore(&rmpp_recv->lock, flags); + ack_recv(rmpp_recv, mad_recv_wc); + goto drop2; + } + + prev_buf = find_seg_location(&rmpp_recv->rmpp_wc->rmpp_list, seg_num); + if (!prev_buf) + goto drop3; + + done_wc = NULL; + list_add(&mad_recv_wc->recv_buf.list, &prev_buf->list); + if (rmpp_recv->cur_seg_buf == prev_buf) { + update_seg_num(rmpp_recv, &mad_recv_wc->recv_buf); + if (get_last_flag(rmpp_recv->cur_seg_buf)) { + rmpp_recv->state = RMPP_STATE_COMPLETE; + spin_unlock_irqrestore(&rmpp_recv->lock, flags); + done_wc = complete_rmpp(rmpp_recv); + goto out; + } else if (rmpp_recv->seg_num == rmpp_recv->newwin) { + rmpp_recv->newwin += window_size(agent); + spin_unlock_irqrestore(&rmpp_recv->lock, flags); + ack_recv(rmpp_recv, mad_recv_wc); + goto out; + } + } + spin_unlock_irqrestore(&rmpp_recv->lock, flags); +out: + deref_rmpp_recv(rmpp_recv); + return done_wc; + +drop3: spin_unlock_irqrestore(&rmpp_recv->lock, flags); +drop2: deref_rmpp_recv(rmpp_recv); +drop1: ib_free_recv_mad(mad_recv_wc); + return NULL; +} + +static struct ib_mad_recv_wc * +start_rmpp(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct mad_rmpp_recv *rmpp_recv; + unsigned long flags; + + rmpp_recv = create_rmpp_recv(agent, mad_recv_wc); + if (!rmpp_recv) { + ib_free_recv_mad(mad_recv_wc); + return NULL; + } + + spin_lock_irqsave(&agent->lock, flags); + if (insert_rmpp_recv(agent, rmpp_recv)) { + spin_unlock_irqrestore(&agent->lock, flags); + /* duplicate first MAD */ + destroy_rmpp_recv(rmpp_recv); + return continue_rmpp(agent, mad_recv_wc); + } + atomic_inc(&rmpp_recv->refcount); + + if (get_last_flag(&mad_recv_wc->recv_buf)) { + rmpp_recv->state = RMPP_STATE_COMPLETE; + spin_unlock_irqrestore(&agent->lock, flags); + complete_rmpp(rmpp_recv); + } else { + spin_unlock_irqrestore(&agent->lock, flags); + /* 40 seconds until we can find the packet lifetimes */ + queue_delayed_work(agent->qp_info->port_priv->wq, + &rmpp_recv->timeout_work, + msecs_to_jiffies(40000)); + rmpp_recv->newwin += window_size(agent); + ack_recv(rmpp_recv, mad_recv_wc); + mad_recv_wc = NULL; + } + deref_rmpp_recv(rmpp_recv); + return mad_recv_wc; +} + +static inline u64 get_seg_addr(struct ib_mad_send_wr_private *mad_send_wr) +{ + return mad_send_wr->sg_list[0].addr + mad_send_wr->data_offset + + (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset) * + (mad_send_wr->seg_num - 1); +} + +static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_rmpp_mad *rmpp_mad; + int timeout; + + rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num); + + if (mad_send_wr->seg_num == 1) { + rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST; + rmpp_mad->rmpp_hdr.paylen_newwin = + cpu_to_be32(mad_send_wr->total_seg * + (sizeof(struct ib_rmpp_mad) - + offsetof(struct ib_rmpp_mad, data))); + mad_send_wr->sg_list[0].length = sizeof(struct ib_rmpp_mad); + } else { + mad_send_wr->send_wr.num_sge = 2; + mad_send_wr->sg_list[0].length = mad_send_wr->data_offset; + mad_send_wr->sg_list[1].addr = get_seg_addr(mad_send_wr); + mad_send_wr->sg_list[1].length = sizeof(struct ib_rmpp_mad) - + mad_send_wr->data_offset; + mad_send_wr->sg_list[1].lkey = mad_send_wr->sg_list[0].lkey; + } + + if (mad_send_wr->seg_num == mad_send_wr->total_seg) { + rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST; + rmpp_mad->rmpp_hdr.paylen_newwin = + cpu_to_be32(sizeof(struct ib_rmpp_mad) - + offsetof(struct ib_rmpp_mad, data) - + mad_send_wr->pad); + } + + /* 2 seconds for an ACK until we can find the packet lifetime */ + timeout = mad_send_wr->send_wr.wr.ud.timeout_ms; + if (!timeout || timeout > 2000) + mad_send_wr->timeout = msecs_to_jiffies(2000); + mad_send_wr->seg_num++; + return ib_send_mad(mad_send_wr); +} + +static void process_rmpp_ack(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_rmpp_mad *rmpp_mad; + unsigned long flags; + int seg_num, newwin, ret; + + rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; + if (rmpp_mad->rmpp_hdr.rmpp_status) + return; + + seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num); + newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + + spin_lock_irqsave(&agent->lock, flags); + mad_send_wr = ib_find_send_mad(agent, rmpp_mad->mad_hdr.tid); + if (!mad_send_wr) + goto out; /* Unmatched ACK */ + + if ((mad_send_wr->last_ack == mad_send_wr->total_seg) || + (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) + goto out; /* Send is already done */ + + if (seg_num > mad_send_wr->total_seg) + goto out; /* Bad ACK */ + + if (newwin < mad_send_wr->newwin || seg_num < mad_send_wr->last_ack) + goto out; /* Old ACK */ + + if (seg_num > mad_send_wr->last_ack) { + mad_send_wr->last_ack = seg_num; + mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries; + } + mad_send_wr->newwin = newwin; + if (mad_send_wr->last_ack == mad_send_wr->total_seg) { + /* If no response is expected, the ACK completes the send */ + if (!mad_send_wr->send_wr.wr.ud.timeout_ms) { + struct ib_mad_send_wc wc; + + ib_mark_mad_done(mad_send_wr); + spin_unlock_irqrestore(&agent->lock, flags); + + wc.status = IB_WC_SUCCESS; + wc.vendor_err = 0; + wc.wr_id = mad_send_wr->wr_id; + ib_mad_complete_send_wr(mad_send_wr, &wc); + return; + } + if (mad_send_wr->refcount == 1) + ib_reset_mad_timeout(mad_send_wr, mad_send_wr-> + send_wr.wr.ud.timeout_ms); + } else if (mad_send_wr->refcount == 1 && + mad_send_wr->seg_num < mad_send_wr->newwin && + mad_send_wr->seg_num <= mad_send_wr->total_seg) { + /* Send failure will just result in a timeout/retry */ + ret = send_next_seg(mad_send_wr); + if (ret) + goto out; + + mad_send_wr->refcount++; + list_del(&mad_send_wr->agent_list); + list_add_tail(&mad_send_wr->agent_list, + &mad_send_wr->mad_agent_priv->send_list); + } +out: + spin_unlock_irqrestore(&agent->lock, flags); +} + +struct ib_mad_recv_wc * +ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; + if (!(rmpp_mad->rmpp_hdr.rmpp_rtime_flags & IB_MGMT_RMPP_FLAG_ACTIVE)) + return mad_recv_wc; + + if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) + goto out; + + switch (rmpp_mad->rmpp_hdr.rmpp_type) { + case IB_MGMT_RMPP_TYPE_DATA: + if (rmpp_mad->rmpp_hdr.seg_num == __constant_htonl(1)) + return start_rmpp(agent, mad_recv_wc); + else + return continue_rmpp(agent, mad_recv_wc); + case IB_MGMT_RMPP_TYPE_ACK: + process_rmpp_ack(agent, mad_recv_wc); + break; + case IB_MGMT_RMPP_TYPE_STOP: + case IB_MGMT_RMPP_TYPE_ABORT: + /* TODO: process_rmpp_nack(agent, mad_recv_wc); */ + break; + default: + break; + } +out: + ib_free_recv_mad(mad_recv_wc); + return NULL; +} + +int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_rmpp_mad *rmpp_mad; + int i, total_len, ret; + + rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE)) + return IB_RMPP_RESULT_UNHANDLED; + + if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) + return IB_RMPP_RESULT_INTERNAL; + + if (mad_send_wr->send_wr.num_sge > 1) + return -EINVAL; /* TODO: support num_sge > 1 */ + + mad_send_wr->seg_num = 1; + mad_send_wr->newwin = 1; + mad_send_wr->data_offset = data_offset(rmpp_mad->mad_hdr.mgmt_class); + + total_len = 0; + for (i = 0; i < mad_send_wr->send_wr.num_sge; i++) + total_len += mad_send_wr->send_wr.sg_list[i].length; + + mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) / + (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset); + mad_send_wr->pad = total_len - offsetof(struct ib_rmpp_mad, data) - + be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + + /* We need to wait for the final ACK even if there isn't a response */ + mad_send_wr->refcount += (mad_send_wr->timeout == 0); + ret = send_next_seg(mad_send_wr); + if (!ret) + return IB_RMPP_RESULT_CONSUMED; + return ret; +} + +int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_send_wc *mad_send_wc) +{ + struct ib_rmpp_mad *rmpp_mad; + struct ib_mad_send_buf *msg; + int ret; + + rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE)) + return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ + + if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) { + msg = (struct ib_mad_send_buf *) (unsigned long) + mad_send_wc->wr_id; + ib_free_send_mad(msg); + return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */ + } + + if (mad_send_wc->status != IB_WC_SUCCESS || + mad_send_wr->status != IB_WC_SUCCESS) + return IB_RMPP_RESULT_PROCESSED; /* Canceled or send error */ + + if (!mad_send_wr->timeout) + return IB_RMPP_RESULT_PROCESSED; /* Response received */ + + if (mad_send_wr->last_ack == mad_send_wr->total_seg) { + mad_send_wr->timeout = + msecs_to_jiffies(mad_send_wr->send_wr.wr.ud.timeout_ms); + return IB_RMPP_RESULT_PROCESSED; /* Send done */ + } + + if (mad_send_wr->seg_num > mad_send_wr->newwin || + mad_send_wr->seg_num > mad_send_wr->total_seg) + return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */ + + ret = send_next_seg(mad_send_wr); + if (ret) { + mad_send_wc->status = IB_WC_GENERAL_ERR; + return IB_RMPP_RESULT_PROCESSED; + } + return IB_RMPP_RESULT_CONSUMED; +} + +int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_rmpp_mad *rmpp_mad; + int ret; + + rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE)) + return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ + + if (mad_send_wr->last_ack == mad_send_wr->total_seg) + return IB_RMPP_RESULT_PROCESSED; + + mad_send_wr->seg_num = mad_send_wr->last_ack + 1; + ret = send_next_seg(mad_send_wr); + if (ret) + return IB_RMPP_RESULT_PROCESSED; + + return IB_RMPP_RESULT_CONSUMED; +} diff --git a/drivers/infiniband/core/mad_rmpp.h b/drivers/infiniband/core/mad_rmpp.h new file mode 100644 index 000000000000..c4924dfb8e75 --- /dev/null +++ b/drivers/infiniband/core/mad_rmpp.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2005 Intel Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mad_rmpp.h 1921 2005-02-25 22:58:44Z sean.hefty $ + */ + +#ifndef __MAD_RMPP_H__ +#define __MAD_RMPP_H__ + +enum { + IB_RMPP_RESULT_PROCESSED, + IB_RMPP_RESULT_CONSUMED, + IB_RMPP_RESULT_INTERNAL, + IB_RMPP_RESULT_UNHANDLED +}; + +int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr); + +struct ib_mad_recv_wc * +ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc); + +int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_send_wc *mad_send_wc); + +void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent); + +int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr); + +#endif /* __MAD_RMPP_H__ */ -- cgit v1.2.3-55-g7522 From cbae32c56314fa3032f92db36caab49f08ab0601 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:38 -0700 Subject: [PATCH] IB: Add Service Record support to SA client Add Service Record support to SA client Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/sa_query.c | 166 ++++++++++++++++++++++++++++++++++++- drivers/infiniband/include/ib_sa.h | 75 ++++++++++++++++- 2 files changed, 236 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 4ec80443702f..18caf61d8847 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -29,7 +30,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: sa_query.c 1389 2004-12-27 22:56:47Z roland $ + * $Id: sa_query.c 2811 2005-07-06 18:11:43Z halr $ */ #include @@ -79,6 +80,12 @@ struct ib_sa_query { int id; }; +struct ib_sa_service_query { + void (*callback)(int, struct ib_sa_service_rec *, void *); + void *context; + struct ib_sa_query sa_query; +}; + struct ib_sa_path_query { void (*callback)(int, struct ib_sa_path_rec *, void *); void *context; @@ -320,6 +327,54 @@ static const struct ib_field mcmember_rec_table[] = { .size_bits = 23 }, }; +#define SERVICE_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_sa_service_rec, field), \ + .struct_size_bytes = sizeof ((struct ib_sa_service_rec *) 0)->field, \ + .field_name = "sa_service_rec:" #field + +static const struct ib_field service_rec_table[] = { + { SERVICE_REC_FIELD(id), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 64 }, + { SERVICE_REC_FIELD(gid), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 128 }, + { SERVICE_REC_FIELD(pkey), + .offset_words = 6, + .offset_bits = 0, + .size_bits = 16 }, + { SERVICE_REC_FIELD(lease), + .offset_words = 7, + .offset_bits = 0, + .size_bits = 32 }, + { SERVICE_REC_FIELD(key), + .offset_words = 8, + .offset_bits = 0, + .size_bits = 128 }, + { SERVICE_REC_FIELD(name), + .offset_words = 12, + .offset_bits = 0, + .size_bits = 64*8 }, + { SERVICE_REC_FIELD(data8), + .offset_words = 28, + .offset_bits = 0, + .size_bits = 16*8 }, + { SERVICE_REC_FIELD(data16), + .offset_words = 32, + .offset_bits = 0, + .size_bits = 8*16 }, + { SERVICE_REC_FIELD(data32), + .offset_words = 36, + .offset_bits = 0, + .size_bits = 4*32 }, + { SERVICE_REC_FIELD(data64), + .offset_words = 40, + .offset_bits = 0, + .size_bits = 2*64 }, +}; + static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); @@ -443,7 +498,6 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms) .remote_qpn = 1, .remote_qkey = IB_QP1_QKEY, .timeout_ms = timeout_ms, - .retries = 0 } } }; @@ -596,6 +650,114 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, } EXPORT_SYMBOL(ib_sa_path_rec_get); +static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, + int status, + struct ib_sa_mad *mad) +{ + struct ib_sa_service_query *query = + container_of(sa_query, struct ib_sa_service_query, sa_query); + + if (mad) { + struct ib_sa_service_rec rec; + + ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table), + mad->data, &rec); + query->callback(status, &rec, query->context); + } else + query->callback(status, NULL, query->context); +} + +static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) +{ + kfree(sa_query->mad); + kfree(container_of(sa_query, struct ib_sa_service_query, sa_query)); +} + +/** + * ib_sa_service_rec_query - Start Service Record operation + * @device:device to send request on + * @port_num: port number to send request on + * @method:SA method - should be get, set, or delete + * @rec:Service Record to send in request + * @comp_mask:component mask to send in request + * @timeout_ms:time to wait for response + * @gfp_mask:GFP mask to use for internal allocations + * @callback:function called when request completes, times out or is + * canceled + * @context:opaque user context passed to callback + * @sa_query:request context, used to cancel request + * + * Send a Service Record set/get/delete to the SA to register, + * unregister or query a service record. + * The callback function will be called when the request completes (or + * fails); status is 0 for a successful response, -EINTR if the query + * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error + * occurred sending the query. The resp parameter of the callback is + * only valid if status is 0. + * + * If the return value of ib_sa_service_rec_query() is negative, it is an + * error code. Otherwise it is a request ID that can be used to cancel + * the query. + */ +int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, + struct ib_sa_service_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, int gfp_mask, + void (*callback)(int status, + struct ib_sa_service_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query) +{ + struct ib_sa_service_query *query; + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; + struct ib_mad_agent *agent = port->agent; + int ret; + + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET && + method != IB_SA_METHOD_DELETE) + return -EINVAL; + + query = kmalloc(sizeof *query, gfp_mask); + if (!query) + return -ENOMEM; + query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); + if (!query->sa_query.mad) { + kfree(query); + return -ENOMEM; + } + + query->callback = callback; + query->context = context; + + init_mad(query->sa_query.mad, agent); + + query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; + query->sa_query.release = ib_sa_service_rec_release; + query->sa_query.port = port; + query->sa_query.mad->mad_hdr.method = method; + query->sa_query.mad->mad_hdr.attr_id = + cpu_to_be16(IB_SA_ATTR_SERVICE_REC); + query->sa_query.mad->sa_hdr.comp_mask = comp_mask; + + ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), + rec, query->sa_query.mad->data); + + *sa_query = &query->sa_query; + + ret = send_mad(&query->sa_query, timeout_ms); + if (ret < 0) { + *sa_query = NULL; + kfree(query->sa_query.mad); + kfree(query); + } + + return ret; +} +EXPORT_SYMBOL(ib_sa_service_rec_query); + static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) diff --git a/drivers/infiniband/include/ib_sa.h b/drivers/infiniband/include/ib_sa.h index 49a95ca2b8f6..62047b753dc0 100644 --- a/drivers/infiniband/include/ib_sa.h +++ b/drivers/infiniband/include/ib_sa.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -29,7 +30,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: ib_sa.h 1389 2004-12-27 22:56:47Z roland $ + * $Id: ib_sa.h 2811 2005-07-06 18:11:43Z halr $ */ #ifndef IB_SA_H @@ -41,9 +42,11 @@ #include enum { - IB_SA_CLASS_VERSION = 2, /* IB spec version 1.1/1.2 */ + IB_SA_CLASS_VERSION = 2, /* IB spec version 1.1/1.2 */ - IB_SA_METHOD_DELETE = 0x15 + IB_SA_METHOD_GET_TABLE = 0x12, + IB_SA_METHOD_GET_TABLE_RESP = 0x92, + IB_SA_METHOD_DELETE = 0x15 }; enum ib_sa_selector { @@ -191,6 +194,61 @@ struct ib_sa_mcmember_rec { int proxy_join; }; +/* Service Record Component Mask Sec 15.2.5.14 Ver 1.1 */ +#define IB_SA_SERVICE_REC_SERVICE_ID IB_SA_COMP_MASK( 0) +#define IB_SA_SERVICE_REC_SERVICE_GID IB_SA_COMP_MASK( 1) +#define IB_SA_SERVICE_REC_SERVICE_PKEY IB_SA_COMP_MASK( 2) +/* reserved: 3 */ +#define IB_SA_SERVICE_REC_SERVICE_LEASE IB_SA_COMP_MASK( 4) +#define IB_SA_SERVICE_REC_SERVICE_KEY IB_SA_COMP_MASK( 5) +#define IB_SA_SERVICE_REC_SERVICE_NAME IB_SA_COMP_MASK( 6) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_0 IB_SA_COMP_MASK( 7) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_1 IB_SA_COMP_MASK( 8) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_2 IB_SA_COMP_MASK( 9) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_3 IB_SA_COMP_MASK(10) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_4 IB_SA_COMP_MASK(11) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_5 IB_SA_COMP_MASK(12) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_6 IB_SA_COMP_MASK(13) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_7 IB_SA_COMP_MASK(14) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_8 IB_SA_COMP_MASK(15) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_9 IB_SA_COMP_MASK(16) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_10 IB_SA_COMP_MASK(17) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_11 IB_SA_COMP_MASK(18) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_12 IB_SA_COMP_MASK(19) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_13 IB_SA_COMP_MASK(20) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_14 IB_SA_COMP_MASK(21) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_15 IB_SA_COMP_MASK(22) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_0 IB_SA_COMP_MASK(23) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_1 IB_SA_COMP_MASK(24) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_2 IB_SA_COMP_MASK(25) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_3 IB_SA_COMP_MASK(26) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_4 IB_SA_COMP_MASK(27) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_5 IB_SA_COMP_MASK(28) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_6 IB_SA_COMP_MASK(29) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_7 IB_SA_COMP_MASK(30) +#define IB_SA_SERVICE_REC_SERVICE_DATA32_0 IB_SA_COMP_MASK(31) +#define IB_SA_SERVICE_REC_SERVICE_DATA32_1 IB_SA_COMP_MASK(32) +#define IB_SA_SERVICE_REC_SERVICE_DATA32_2 IB_SA_COMP_MASK(33) +#define IB_SA_SERVICE_REC_SERVICE_DATA32_3 IB_SA_COMP_MASK(34) +#define IB_SA_SERVICE_REC_SERVICE_DATA64_0 IB_SA_COMP_MASK(35) +#define IB_SA_SERVICE_REC_SERVICE_DATA64_1 IB_SA_COMP_MASK(36) + +#define IB_DEFAULT_SERVICE_LEASE 0xFFFFFFFF + +struct ib_sa_service_rec { + u64 id; + union ib_gid gid; + u16 pkey; + /* reserved */ + u32 lease; + u8 key[16]; + u8 name[64]; + u8 data8[16]; + u16 data16[8]; + u32 data32[4]; + u64 data64[2]; +}; + struct ib_sa_query; void ib_sa_cancel_query(int id, struct ib_sa_query *query); @@ -216,6 +274,17 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, void *context, struct ib_sa_query **query); +int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, + u8 method, + struct ib_sa_service_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, int gfp_mask, + void (*callback)(int status, + struct ib_sa_service_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query); + /** * ib_sa_mcmember_rec_set - Start an MCMember set query * @device:device to send query on -- cgit v1.2.3-55-g7522 From a977049dacdef6a9e69fb4872b42a68e93a69956 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:40 -0700 Subject: [PATCH] IB: Add the kernel CM implementation Add the kernel CM implementation Signed-off-by: Sean Hefty Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/Makefile | 5 +- drivers/infiniband/core/cm.c | 3324 +++++++++++++++++++++++++++++++++++++ drivers/infiniband/core/cm_msgs.h | 819 +++++++++ 3 files changed, 4147 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/core/cm.c create mode 100644 drivers/infiniband/core/cm_msgs.h (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 96b8eba95849..216cb281abdd 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,6 +1,7 @@ EXTRA_CFLAGS += -Idrivers/infiniband/include -obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o ib_umad.o +obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ + ib_cm.o ib_umad.o obj-$(CONFIG_INFINIBAND_USER_VERBS) += ib_uverbs.o ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ @@ -10,6 +11,8 @@ ib_mad-y := mad.o smi.o agent.o mad_rmpp.o ib_sa-y := sa_query.o +ib_cm-y := cm.o + ib_umad-y := user_mad.o ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c new file mode 100644 index 000000000000..403ed125d8f4 --- /dev/null +++ b/drivers/infiniband/core/cm.c @@ -0,0 +1,3324 @@ +/* + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: cm.c 2821 2005-07-08 17:07:28Z sean.hefty $ + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "cm_msgs.h" + +MODULE_AUTHOR("Sean Hefty"); +MODULE_DESCRIPTION("InfiniBand CM"); +MODULE_LICENSE("Dual BSD/GPL"); + +static void cm_add_one(struct ib_device *device); +static void cm_remove_one(struct ib_device *device); + +static struct ib_client cm_client = { + .name = "cm", + .add = cm_add_one, + .remove = cm_remove_one +}; + +static struct ib_cm { + spinlock_t lock; + struct list_head device_list; + rwlock_t device_lock; + struct rb_root listen_service_table; + u64 listen_service_id; + /* struct rb_root peer_service_table; todo: fix peer to peer */ + struct rb_root remote_qp_table; + struct rb_root remote_id_table; + struct rb_root remote_sidr_table; + struct idr local_id_table; + struct workqueue_struct *wq; +} cm; + +struct cm_port { + struct cm_device *cm_dev; + struct ib_mad_agent *mad_agent; + u8 port_num; +}; + +struct cm_device { + struct list_head list; + struct ib_device *device; + u64 ca_guid; + struct cm_port port[0]; +}; + +struct cm_av { + struct cm_port *port; + union ib_gid dgid; + struct ib_ah_attr ah_attr; + u16 pkey_index; + u8 packet_life_time; +}; + +struct cm_work { + struct work_struct work; + struct list_head list; + struct cm_port *port; + struct ib_mad_recv_wc *mad_recv_wc; /* Received MADs */ + u32 local_id; /* Established / timewait */ + u32 remote_id; + struct ib_cm_event cm_event; + struct ib_sa_path_rec path[0]; +}; + +struct cm_timewait_info { + struct cm_work work; /* Must be first. */ + struct rb_node remote_qp_node; + struct rb_node remote_id_node; + u64 remote_ca_guid; + u32 remote_qpn; + u8 inserted_remote_qp; + u8 inserted_remote_id; +}; + +struct cm_id_private { + struct ib_cm_id id; + + struct rb_node service_node; + struct rb_node sidr_id_node; + spinlock_t lock; + wait_queue_head_t wait; + atomic_t refcount; + + struct ib_mad_send_buf *msg; + struct cm_timewait_info *timewait_info; + /* todo: use alternate port on send failure */ + struct cm_av av; + struct cm_av alt_av; + + void *private_data; + u64 tid; + u32 local_qpn; + u32 remote_qpn; + u32 sq_psn; + u32 rq_psn; + int timeout_ms; + enum ib_mtu path_mtu; + u8 private_data_len; + u8 max_cm_retries; + u8 peer_to_peer; + u8 responder_resources; + u8 initiator_depth; + u8 local_ack_timeout; + u8 retry_count; + u8 rnr_retry_count; + u8 service_timeout; + + struct list_head work_list; + atomic_t work_count; +}; + +static void cm_work_handler(void *data); + +static inline void cm_deref_id(struct cm_id_private *cm_id_priv) +{ + if (atomic_dec_and_test(&cm_id_priv->refcount)) + wake_up(&cm_id_priv->wait); +} + +static int cm_alloc_msg(struct cm_id_private *cm_id_priv, + struct ib_mad_send_buf **msg) +{ + struct ib_mad_agent *mad_agent; + struct ib_mad_send_buf *m; + struct ib_ah *ah; + + mad_agent = cm_id_priv->av.port->mad_agent; + ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr); + if (IS_ERR(ah)) + return PTR_ERR(ah); + + m = ib_create_send_mad(mad_agent, 1, cm_id_priv->av.pkey_index, + ah, 0, sizeof(struct ib_mad_hdr), + sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr), + GFP_ATOMIC); + if (IS_ERR(m)) { + ib_destroy_ah(ah); + return PTR_ERR(m); + } + + /* Timeout set by caller if response is expected. */ + m->send_wr.wr.ud.retries = cm_id_priv->max_cm_retries; + + atomic_inc(&cm_id_priv->refcount); + m->context[0] = cm_id_priv; + *msg = m; + return 0; +} + +static int cm_alloc_response_msg(struct cm_port *port, + struct ib_mad_recv_wc *mad_recv_wc, + struct ib_mad_send_buf **msg) +{ + struct ib_mad_send_buf *m; + struct ib_ah *ah; + + ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc, + mad_recv_wc->recv_buf.grh, port->port_num); + if (IS_ERR(ah)) + return PTR_ERR(ah); + + m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, + ah, 0, sizeof(struct ib_mad_hdr), + sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr), + GFP_ATOMIC); + if (IS_ERR(m)) { + ib_destroy_ah(ah); + return PTR_ERR(m); + } + *msg = m; + return 0; +} + +static void cm_free_msg(struct ib_mad_send_buf *msg) +{ + ib_destroy_ah(msg->send_wr.wr.ud.ah); + if (msg->context[0]) + cm_deref_id(msg->context[0]); + ib_free_send_mad(msg); +} + +static void * cm_copy_private_data(const void *private_data, + u8 private_data_len) +{ + void *data; + + if (!private_data || !private_data_len) + return NULL; + + data = kmalloc(private_data_len, GFP_KERNEL); + if (!data) + return ERR_PTR(-ENOMEM); + + memcpy(data, private_data, private_data_len); + return data; +} + +static void cm_set_private_data(struct cm_id_private *cm_id_priv, + void *private_data, u8 private_data_len) +{ + if (cm_id_priv->private_data && cm_id_priv->private_data_len) + kfree(cm_id_priv->private_data); + + cm_id_priv->private_data = private_data; + cm_id_priv->private_data_len = private_data_len; +} + +static void cm_set_ah_attr(struct ib_ah_attr *ah_attr, u8 port_num, + u16 dlid, u8 sl, u16 src_path_bits) +{ + memset(ah_attr, 0, sizeof ah_attr); + ah_attr->dlid = be16_to_cpu(dlid); + ah_attr->sl = sl; + ah_attr->src_path_bits = src_path_bits; + ah_attr->port_num = port_num; +} + +static void cm_init_av_for_response(struct cm_port *port, + struct ib_wc *wc, struct cm_av *av) +{ + av->port = port; + av->pkey_index = wc->pkey_index; + cm_set_ah_attr(&av->ah_attr, port->port_num, cpu_to_be16(wc->slid), + wc->sl, wc->dlid_path_bits); +} + +static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) +{ + struct cm_device *cm_dev; + struct cm_port *port = NULL; + unsigned long flags; + int ret; + u8 p; + + read_lock_irqsave(&cm.device_lock, flags); + list_for_each_entry(cm_dev, &cm.device_list, list) { + if (!ib_find_cached_gid(cm_dev->device, &path->sgid, + &p, NULL)) { + port = &cm_dev->port[p-1]; + break; + } + } + read_unlock_irqrestore(&cm.device_lock, flags); + + if (!port) + return -EINVAL; + + ret = ib_find_cached_pkey(cm_dev->device, port->port_num, + be16_to_cpu(path->pkey), &av->pkey_index); + if (ret) + return ret; + + av->port = port; + cm_set_ah_attr(&av->ah_attr, av->port->port_num, path->dlid, + path->sl, path->slid & 0x7F); + av->packet_life_time = path->packet_life_time; + return 0; +} + +static int cm_alloc_id(struct cm_id_private *cm_id_priv) +{ + unsigned long flags; + int ret; + + do { + spin_lock_irqsave(&cm.lock, flags); + ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, 1, + (int *) &cm_id_priv->id.local_id); + spin_unlock_irqrestore(&cm.lock, flags); + } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); + return ret; +} + +static void cm_free_id(u32 local_id) +{ + unsigned long flags; + + spin_lock_irqsave(&cm.lock, flags); + idr_remove(&cm.local_id_table, (int) local_id); + spin_unlock_irqrestore(&cm.lock, flags); +} + +static struct cm_id_private * cm_get_id(u32 local_id, u32 remote_id) +{ + struct cm_id_private *cm_id_priv; + + cm_id_priv = idr_find(&cm.local_id_table, (int) local_id); + if (cm_id_priv) { + if (cm_id_priv->id.remote_id == remote_id) + atomic_inc(&cm_id_priv->refcount); + else + cm_id_priv = NULL; + } + + return cm_id_priv; +} + +static struct cm_id_private * cm_acquire_id(u32 local_id, u32 remote_id) +{ + struct cm_id_private *cm_id_priv; + unsigned long flags; + + spin_lock_irqsave(&cm.lock, flags); + cm_id_priv = cm_get_id(local_id, remote_id); + spin_unlock_irqrestore(&cm.lock, flags); + + return cm_id_priv; +} + +static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) +{ + struct rb_node **link = &cm.listen_service_table.rb_node; + struct rb_node *parent = NULL; + struct cm_id_private *cur_cm_id_priv; + u64 service_id = cm_id_priv->id.service_id; + u64 service_mask = cm_id_priv->id.service_mask; + + while (*link) { + parent = *link; + cur_cm_id_priv = rb_entry(parent, struct cm_id_private, + service_node); + if ((cur_cm_id_priv->id.service_mask & service_id) == + (service_mask & cur_cm_id_priv->id.service_id)) + return cm_id_priv; + if (service_id < cur_cm_id_priv->id.service_id) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + rb_link_node(&cm_id_priv->service_node, parent, link); + rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table); + return NULL; +} + +static struct cm_id_private * cm_find_listen(u64 service_id) +{ + struct rb_node *node = cm.listen_service_table.rb_node; + struct cm_id_private *cm_id_priv; + + while (node) { + cm_id_priv = rb_entry(node, struct cm_id_private, service_node); + if ((cm_id_priv->id.service_mask & service_id) == + (cm_id_priv->id.service_mask & cm_id_priv->id.service_id)) + return cm_id_priv; + if (service_id < cm_id_priv->id.service_id) + node = node->rb_left; + else + node = node->rb_right; + } + return NULL; +} + +static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info + *timewait_info) +{ + struct rb_node **link = &cm.remote_id_table.rb_node; + struct rb_node *parent = NULL; + struct cm_timewait_info *cur_timewait_info; + u64 remote_ca_guid = timewait_info->remote_ca_guid; + u32 remote_id = timewait_info->work.remote_id; + + while (*link) { + parent = *link; + cur_timewait_info = rb_entry(parent, struct cm_timewait_info, + remote_id_node); + if (remote_id < cur_timewait_info->work.remote_id) + link = &(*link)->rb_left; + else if (remote_id > cur_timewait_info->work.remote_id) + link = &(*link)->rb_right; + else if (remote_ca_guid < cur_timewait_info->remote_ca_guid) + link = &(*link)->rb_left; + else if (remote_ca_guid > cur_timewait_info->remote_ca_guid) + link = &(*link)->rb_right; + else + return cur_timewait_info; + } + timewait_info->inserted_remote_id = 1; + rb_link_node(&timewait_info->remote_id_node, parent, link); + rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table); + return NULL; +} + +static struct cm_timewait_info * cm_find_remote_id(u64 remote_ca_guid, + u32 remote_id) +{ + struct rb_node *node = cm.remote_id_table.rb_node; + struct cm_timewait_info *timewait_info; + + while (node) { + timewait_info = rb_entry(node, struct cm_timewait_info, + remote_id_node); + if (remote_id < timewait_info->work.remote_id) + node = node->rb_left; + else if (remote_id > timewait_info->work.remote_id) + node = node->rb_right; + else if (remote_ca_guid < timewait_info->remote_ca_guid) + node = node->rb_left; + else if (remote_ca_guid > timewait_info->remote_ca_guid) + node = node->rb_right; + else + return timewait_info; + } + return NULL; +} + +static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info + *timewait_info) +{ + struct rb_node **link = &cm.remote_qp_table.rb_node; + struct rb_node *parent = NULL; + struct cm_timewait_info *cur_timewait_info; + u64 remote_ca_guid = timewait_info->remote_ca_guid; + u32 remote_qpn = timewait_info->remote_qpn; + + while (*link) { + parent = *link; + cur_timewait_info = rb_entry(parent, struct cm_timewait_info, + remote_qp_node); + if (remote_qpn < cur_timewait_info->remote_qpn) + link = &(*link)->rb_left; + else if (remote_qpn > cur_timewait_info->remote_qpn) + link = &(*link)->rb_right; + else if (remote_ca_guid < cur_timewait_info->remote_ca_guid) + link = &(*link)->rb_left; + else if (remote_ca_guid > cur_timewait_info->remote_ca_guid) + link = &(*link)->rb_right; + else + return cur_timewait_info; + } + timewait_info->inserted_remote_qp = 1; + rb_link_node(&timewait_info->remote_qp_node, parent, link); + rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table); + return NULL; +} + +static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private + *cm_id_priv) +{ + struct rb_node **link = &cm.remote_sidr_table.rb_node; + struct rb_node *parent = NULL; + struct cm_id_private *cur_cm_id_priv; + union ib_gid *port_gid = &cm_id_priv->av.dgid; + u32 remote_id = cm_id_priv->id.remote_id; + + while (*link) { + parent = *link; + cur_cm_id_priv = rb_entry(parent, struct cm_id_private, + sidr_id_node); + if (remote_id < cur_cm_id_priv->id.remote_id) + link = &(*link)->rb_left; + else if (remote_id > cur_cm_id_priv->id.remote_id) + link = &(*link)->rb_right; + else { + int cmp; + cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid, + sizeof *port_gid); + if (cmp < 0) + link = &(*link)->rb_left; + else if (cmp > 0) + link = &(*link)->rb_right; + else + return cur_cm_id_priv; + } + } + rb_link_node(&cm_id_priv->sidr_id_node, parent, link); + rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); + return NULL; +} + +static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv, + enum ib_cm_sidr_status status) +{ + struct ib_cm_sidr_rep_param param; + + memset(¶m, 0, sizeof param); + param.status = status; + ib_send_cm_sidr_rep(&cm_id_priv->id, ¶m); +} + +struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, + void *context) +{ + struct cm_id_private *cm_id_priv; + int ret; + + cm_id_priv = kmalloc(sizeof *cm_id_priv, GFP_KERNEL); + if (!cm_id_priv) + return ERR_PTR(-ENOMEM); + + memset(cm_id_priv, 0, sizeof *cm_id_priv); + cm_id_priv->id.state = IB_CM_IDLE; + cm_id_priv->id.cm_handler = cm_handler; + cm_id_priv->id.context = context; + ret = cm_alloc_id(cm_id_priv); + if (ret) + goto error; + + spin_lock_init(&cm_id_priv->lock); + init_waitqueue_head(&cm_id_priv->wait); + INIT_LIST_HEAD(&cm_id_priv->work_list); + atomic_set(&cm_id_priv->work_count, -1); + atomic_set(&cm_id_priv->refcount, 1); + return &cm_id_priv->id; + +error: + kfree(cm_id_priv); + return ERR_PTR(-ENOMEM); +} +EXPORT_SYMBOL(ib_create_cm_id); + +static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv) +{ + struct cm_work *work; + + if (list_empty(&cm_id_priv->work_list)) + return NULL; + + work = list_entry(cm_id_priv->work_list.next, struct cm_work, list); + list_del(&work->list); + return work; +} + +static void cm_free_work(struct cm_work *work) +{ + if (work->mad_recv_wc) + ib_free_recv_mad(work->mad_recv_wc); + kfree(work); +} + +static inline int cm_convert_to_ms(int iba_time) +{ + /* approximate conversion to ms from 4.096us x 2^iba_time */ + return 1 << max(iba_time - 8, 0); +} + +static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) +{ + unsigned long flags; + + if (!timewait_info->inserted_remote_id && + !timewait_info->inserted_remote_qp) + return; + + spin_lock_irqsave(&cm.lock, flags); + if (timewait_info->inserted_remote_id) { + rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table); + timewait_info->inserted_remote_id = 0; + } + + if (timewait_info->inserted_remote_qp) { + rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table); + timewait_info->inserted_remote_qp = 0; + } + spin_unlock_irqrestore(&cm.lock, flags); +} + +static struct cm_timewait_info * cm_create_timewait_info(u32 local_id) +{ + struct cm_timewait_info *timewait_info; + + timewait_info = kmalloc(sizeof *timewait_info, GFP_KERNEL); + if (!timewait_info) + return ERR_PTR(-ENOMEM); + memset(timewait_info, 0, sizeof *timewait_info); + + timewait_info->work.local_id = local_id; + INIT_WORK(&timewait_info->work.work, cm_work_handler, + &timewait_info->work); + timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT; + return timewait_info; +} + +static void cm_enter_timewait(struct cm_id_private *cm_id_priv) +{ + int wait_time; + + /* + * The cm_id could be destroyed by the user before we exit timewait. + * To protect against this, we search for the cm_id after exiting + * timewait before notifying the user that we've exited timewait. + */ + cm_id_priv->id.state = IB_CM_TIMEWAIT; + wait_time = cm_convert_to_ms(cm_id_priv->local_ack_timeout); + queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, + msecs_to_jiffies(wait_time)); + cm_id_priv->timewait_info = NULL; +} + +static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) +{ + cm_id_priv->id.state = IB_CM_IDLE; + if (cm_id_priv->timewait_info) { + cm_cleanup_timewait(cm_id_priv->timewait_info); + kfree(cm_id_priv->timewait_info); + cm_id_priv->timewait_info = NULL; + } +} + +void ib_destroy_cm_id(struct ib_cm_id *cm_id) +{ + struct cm_id_private *cm_id_priv; + struct cm_work *work; + unsigned long flags; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); +retest: + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id->state) { + case IB_CM_LISTEN: + cm_id->state = IB_CM_IDLE; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_lock_irqsave(&cm.lock, flags); + rb_erase(&cm_id_priv->service_node, &cm.listen_service_table); + spin_unlock_irqrestore(&cm.lock, flags); + break; + case IB_CM_SIDR_REQ_SENT: + cm_id->state = IB_CM_IDLE; + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + break; + case IB_CM_SIDR_REQ_RCVD: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); + break; + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + /* Fall through */ + case IB_CM_REQ_RCVD: + case IB_CM_MRA_REQ_SENT: + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, + &cm_id_priv->av.port->cm_dev->ca_guid, + sizeof cm_id_priv->av.port->cm_dev->ca_guid, + NULL, 0); + break; + case IB_CM_ESTABLISHED: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ib_send_cm_dreq(cm_id, NULL, 0); + goto retest; + case IB_CM_DREQ_SENT: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + cm_enter_timewait(cm_id_priv); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + break; + case IB_CM_DREQ_RCVD: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ib_send_cm_drep(cm_id, NULL, 0); + break; + default: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + break; + } + + cm_free_id(cm_id->local_id); + atomic_dec(&cm_id_priv->refcount); + wait_event(cm_id_priv->wait, !atomic_read(&cm_id_priv->refcount)); + while ((work = cm_dequeue_work(cm_id_priv)) != NULL) + cm_free_work(work); + if (cm_id_priv->private_data && cm_id_priv->private_data_len) + kfree(cm_id_priv->private_data); + kfree(cm_id_priv); +} +EXPORT_SYMBOL(ib_destroy_cm_id); + +int ib_cm_listen(struct ib_cm_id *cm_id, + u64 service_id, + u64 service_mask) +{ + struct cm_id_private *cm_id_priv, *cur_cm_id_priv; + unsigned long flags; + int ret = 0; + + service_mask = service_mask ? service_mask : ~0ULL; + service_id &= service_mask; + if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID && + (service_id != IB_CM_ASSIGN_SERVICE_ID)) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + BUG_ON(cm_id->state != IB_CM_IDLE); + + cm_id->state = IB_CM_LISTEN; + + spin_lock_irqsave(&cm.lock, flags); + if (service_id == IB_CM_ASSIGN_SERVICE_ID) { + cm_id->service_id = __cpu_to_be64(cm.listen_service_id++); + cm_id->service_mask = ~0ULL; + } else { + cm_id->service_id = service_id; + cm_id->service_mask = service_mask; + } + cur_cm_id_priv = cm_insert_listen(cm_id_priv); + spin_unlock_irqrestore(&cm.lock, flags); + + if (cur_cm_id_priv) { + cm_id->state = IB_CM_IDLE; + ret = -EBUSY; + } + return ret; +} +EXPORT_SYMBOL(ib_cm_listen); + +static u64 cm_form_tid(struct cm_id_private *cm_id_priv, + enum cm_msg_sequence msg_seq) +{ + u64 hi_tid, low_tid; + + hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32; + low_tid = (u64) (cm_id_priv->id.local_id | (msg_seq << 30)); + return cpu_to_be64(hi_tid | low_tid); +} + +static void cm_format_mad_hdr(struct ib_mad_hdr *hdr, + enum cm_msg_attr_id attr_id, u64 tid) +{ + hdr->base_version = IB_MGMT_BASE_VERSION; + hdr->mgmt_class = IB_MGMT_CLASS_CM; + hdr->class_version = IB_CM_CLASS_VERSION; + hdr->method = IB_MGMT_METHOD_SEND; + hdr->attr_id = attr_id; + hdr->tid = tid; +} + +static void cm_format_req(struct cm_req_msg *req_msg, + struct cm_id_private *cm_id_priv, + struct ib_cm_req_param *param) +{ + cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, + cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); + + req_msg->local_comm_id = cm_id_priv->id.local_id; + req_msg->service_id = param->service_id; + req_msg->local_ca_guid = cm_id_priv->av.port->cm_dev->ca_guid; + cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num)); + cm_req_set_resp_res(req_msg, param->responder_resources); + cm_req_set_init_depth(req_msg, param->initiator_depth); + cm_req_set_remote_resp_timeout(req_msg, + param->remote_cm_response_timeout); + cm_req_set_qp_type(req_msg, param->qp_type); + cm_req_set_flow_ctrl(req_msg, param->flow_control); + cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn)); + cm_req_set_local_resp_timeout(req_msg, + param->local_cm_response_timeout); + cm_req_set_retry_count(req_msg, param->retry_count); + req_msg->pkey = param->primary_path->pkey; + cm_req_set_path_mtu(req_msg, param->primary_path->mtu); + cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); + cm_req_set_max_cm_retries(req_msg, param->max_cm_retries); + cm_req_set_srq(req_msg, param->srq); + + req_msg->primary_local_lid = param->primary_path->slid; + req_msg->primary_remote_lid = param->primary_path->dlid; + req_msg->primary_local_gid = param->primary_path->sgid; + req_msg->primary_remote_gid = param->primary_path->dgid; + cm_req_set_primary_flow_label(req_msg, param->primary_path->flow_label); + cm_req_set_primary_packet_rate(req_msg, param->primary_path->rate); + req_msg->primary_traffic_class = param->primary_path->traffic_class; + req_msg->primary_hop_limit = param->primary_path->hop_limit; + cm_req_set_primary_sl(req_msg, param->primary_path->sl); + cm_req_set_primary_subnet_local(req_msg, 1); /* local only... */ + cm_req_set_primary_local_ack_timeout(req_msg, + min(31, param->primary_path->packet_life_time + 1)); + + if (param->alternate_path) { + req_msg->alt_local_lid = param->alternate_path->slid; + req_msg->alt_remote_lid = param->alternate_path->dlid; + req_msg->alt_local_gid = param->alternate_path->sgid; + req_msg->alt_remote_gid = param->alternate_path->dgid; + cm_req_set_alt_flow_label(req_msg, + param->alternate_path->flow_label); + cm_req_set_alt_packet_rate(req_msg, param->alternate_path->rate); + req_msg->alt_traffic_class = param->alternate_path->traffic_class; + req_msg->alt_hop_limit = param->alternate_path->hop_limit; + cm_req_set_alt_sl(req_msg, param->alternate_path->sl); + cm_req_set_alt_subnet_local(req_msg, 1); /* local only... */ + cm_req_set_alt_local_ack_timeout(req_msg, + min(31, param->alternate_path->packet_life_time + 1)); + } + + if (param->private_data && param->private_data_len) + memcpy(req_msg->private_data, param->private_data, + param->private_data_len); +} + +static inline int cm_validate_req_param(struct ib_cm_req_param *param) +{ + /* peer-to-peer not supported */ + if (param->peer_to_peer) + return -EINVAL; + + if (!param->primary_path) + return -EINVAL; + + if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC) + return -EINVAL; + + if (param->private_data && + param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE) + return -EINVAL; + + if (param->alternate_path && + (param->alternate_path->pkey != param->primary_path->pkey || + param->alternate_path->mtu != param->primary_path->mtu)) + return -EINVAL; + + return 0; +} + +int ib_send_cm_req(struct ib_cm_id *cm_id, + struct ib_cm_req_param *param) +{ + struct cm_id_private *cm_id_priv; + struct ib_send_wr *bad_send_wr; + struct cm_req_msg *req_msg; + unsigned long flags; + int ret; + + ret = cm_validate_req_param(param); + if (ret) + return ret; + + /* Verify that we're not in timewait. */ + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_IDLE) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = -EINVAL; + goto out; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> + id.local_id); + if (IS_ERR(cm_id_priv->timewait_info)) + goto out; + + ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av); + if (ret) + goto error1; + if (param->alternate_path) { + ret = cm_init_av_by_path(param->alternate_path, + &cm_id_priv->alt_av); + if (ret) + goto error1; + } + cm_id->service_id = param->service_id; + cm_id->service_mask = ~0ULL; + cm_id_priv->timeout_ms = cm_convert_to_ms( + param->primary_path->packet_life_time) * 2 + + cm_convert_to_ms( + param->remote_cm_response_timeout); + cm_id_priv->max_cm_retries = param->max_cm_retries; + cm_id_priv->initiator_depth = param->initiator_depth; + cm_id_priv->responder_resources = param->responder_resources; + cm_id_priv->retry_count = param->retry_count; + cm_id_priv->path_mtu = param->primary_path->mtu; + + ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); + if (ret) + goto error1; + + req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; + cm_format_req(req_msg, cm_id_priv, param); + cm_id_priv->tid = req_msg->hdr.tid; + cm_id_priv->msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; + + cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); + cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg); + cm_id_priv->local_ack_timeout = + cm_req_get_primary_local_ack_timeout(req_msg); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &cm_id_priv->msg->send_wr, &bad_send_wr); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + goto error2; + } + BUG_ON(cm_id->state != IB_CM_IDLE); + cm_id->state = IB_CM_REQ_SENT; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return 0; + +error2: cm_free_msg(cm_id_priv->msg); +error1: kfree(cm_id_priv->timewait_info); +out: return ret; +} +EXPORT_SYMBOL(ib_send_cm_req); + +static int cm_issue_rej(struct cm_port *port, + struct ib_mad_recv_wc *mad_recv_wc, + enum ib_cm_rej_reason reason, + enum cm_msg_response msg_rejected, + void *ari, u8 ari_length) +{ + struct ib_mad_send_buf *msg = NULL; + struct ib_send_wr *bad_send_wr; + struct cm_rej_msg *rej_msg, *rcv_msg; + int ret; + + ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); + if (ret) + return ret; + + /* We just need common CM header information. Cast to any message. */ + rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad; + rej_msg = (struct cm_rej_msg *) msg->mad; + + cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid); + rej_msg->remote_comm_id = rcv_msg->local_comm_id; + rej_msg->local_comm_id = rcv_msg->remote_comm_id; + cm_rej_set_msg_rejected(rej_msg, msg_rejected); + rej_msg->reason = reason; + + if (ari && ari_length) { + cm_rej_set_reject_info_len(rej_msg, ari_length); + memcpy(rej_msg->ari, ari, ari_length); + } + + ret = ib_post_send_mad(port->mad_agent, &msg->send_wr, &bad_send_wr); + if (ret) + cm_free_msg(msg); + + return ret; +} + +static inline int cm_is_active_peer(u64 local_ca_guid, u64 remote_ca_guid, + u32 local_qpn, u32 remote_qpn) +{ + return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) || + ((local_ca_guid == remote_ca_guid) && + (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn)))); +} + +static inline void cm_format_paths_from_req(struct cm_req_msg *req_msg, + struct ib_sa_path_rec *primary_path, + struct ib_sa_path_rec *alt_path) +{ + memset(primary_path, 0, sizeof *primary_path); + primary_path->dgid = req_msg->primary_local_gid; + primary_path->sgid = req_msg->primary_remote_gid; + primary_path->dlid = req_msg->primary_local_lid; + primary_path->slid = req_msg->primary_remote_lid; + primary_path->flow_label = cm_req_get_primary_flow_label(req_msg); + primary_path->hop_limit = req_msg->primary_hop_limit; + primary_path->traffic_class = req_msg->primary_traffic_class; + primary_path->reversible = 1; + primary_path->pkey = req_msg->pkey; + primary_path->sl = cm_req_get_primary_sl(req_msg); + primary_path->mtu_selector = IB_SA_EQ; + primary_path->mtu = cm_req_get_path_mtu(req_msg); + primary_path->rate_selector = IB_SA_EQ; + primary_path->rate = cm_req_get_primary_packet_rate(req_msg); + primary_path->packet_life_time_selector = IB_SA_EQ; + primary_path->packet_life_time = + cm_req_get_primary_local_ack_timeout(req_msg); + primary_path->packet_life_time -= (primary_path->packet_life_time > 0); + + if (req_msg->alt_local_lid) { + memset(alt_path, 0, sizeof *alt_path); + alt_path->dgid = req_msg->alt_local_gid; + alt_path->sgid = req_msg->alt_remote_gid; + alt_path->dlid = req_msg->alt_local_lid; + alt_path->slid = req_msg->alt_remote_lid; + alt_path->flow_label = cm_req_get_alt_flow_label(req_msg); + alt_path->hop_limit = req_msg->alt_hop_limit; + alt_path->traffic_class = req_msg->alt_traffic_class; + alt_path->reversible = 1; + alt_path->pkey = req_msg->pkey; + alt_path->sl = cm_req_get_alt_sl(req_msg); + alt_path->mtu_selector = IB_SA_EQ; + alt_path->mtu = cm_req_get_path_mtu(req_msg); + alt_path->rate_selector = IB_SA_EQ; + alt_path->rate = cm_req_get_alt_packet_rate(req_msg); + alt_path->packet_life_time_selector = IB_SA_EQ; + alt_path->packet_life_time = + cm_req_get_alt_local_ack_timeout(req_msg); + alt_path->packet_life_time -= (alt_path->packet_life_time > 0); + } +} + +static void cm_format_req_event(struct cm_work *work, + struct cm_id_private *cm_id_priv, + struct ib_cm_id *listen_id) +{ + struct cm_req_msg *req_msg; + struct ib_cm_req_event_param *param; + + req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; + param = &work->cm_event.param.req_rcvd; + param->listen_id = listen_id; + param->device = cm_id_priv->av.port->mad_agent->device; + param->port = cm_id_priv->av.port->port_num; + param->primary_path = &work->path[0]; + if (req_msg->alt_local_lid) + param->alternate_path = &work->path[1]; + else + param->alternate_path = NULL; + param->remote_ca_guid = req_msg->local_ca_guid; + param->remote_qkey = be32_to_cpu(req_msg->local_qkey); + param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg)); + param->qp_type = cm_req_get_qp_type(req_msg); + param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg)); + param->responder_resources = cm_req_get_init_depth(req_msg); + param->initiator_depth = cm_req_get_resp_res(req_msg); + param->local_cm_response_timeout = + cm_req_get_remote_resp_timeout(req_msg); + param->flow_control = cm_req_get_flow_ctrl(req_msg); + param->remote_cm_response_timeout = + cm_req_get_local_resp_timeout(req_msg); + param->retry_count = cm_req_get_retry_count(req_msg); + param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); + param->srq = cm_req_get_srq(req_msg); + work->cm_event.private_data = &req_msg->private_data; +} + +static void cm_process_work(struct cm_id_private *cm_id_priv, + struct cm_work *work) +{ + unsigned long flags; + int ret; + + /* We will typically only have the current event to report. */ + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event); + cm_free_work(work); + + while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) { + spin_lock_irqsave(&cm_id_priv->lock, flags); + work = cm_dequeue_work(cm_id_priv); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + BUG_ON(!work); + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, + &work->cm_event); + cm_free_work(work); + } + cm_deref_id(cm_id_priv); + if (ret) + ib_destroy_cm_id(&cm_id_priv->id); +} + +static void cm_format_mra(struct cm_mra_msg *mra_msg, + struct cm_id_private *cm_id_priv, + enum cm_msg_response msg_mraed, u8 service_timeout, + const void *private_data, u8 private_data_len) +{ + cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid); + cm_mra_set_msg_mraed(mra_msg, msg_mraed); + mra_msg->local_comm_id = cm_id_priv->id.local_id; + mra_msg->remote_comm_id = cm_id_priv->id.remote_id; + cm_mra_set_service_timeout(mra_msg, service_timeout); + + if (private_data && private_data_len) + memcpy(mra_msg->private_data, private_data, private_data_len); +} + +static void cm_format_rej(struct cm_rej_msg *rej_msg, + struct cm_id_private *cm_id_priv, + enum ib_cm_rej_reason reason, + void *ari, + u8 ari_length, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid); + rej_msg->remote_comm_id = cm_id_priv->id.remote_id; + + switch(cm_id_priv->id.state) { + case IB_CM_REQ_RCVD: + rej_msg->local_comm_id = 0; + cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); + break; + case IB_CM_MRA_REQ_SENT: + rej_msg->local_comm_id = cm_id_priv->id.local_id; + cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); + break; + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + rej_msg->local_comm_id = cm_id_priv->id.local_id; + cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP); + break; + default: + rej_msg->local_comm_id = cm_id_priv->id.local_id; + cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER); + break; + } + + rej_msg->reason = reason; + if (ari && ari_length) { + cm_rej_set_reject_info_len(rej_msg, ari_length); + memcpy(rej_msg->ari, ari, ari_length); + } + + if (private_data && private_data_len) + memcpy(rej_msg->private_data, private_data, private_data_len); +} + +static void cm_dup_req_handler(struct cm_work *work, + struct cm_id_private *cm_id_priv) +{ + struct ib_mad_send_buf *msg = NULL; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + /* Quick state check to discard duplicate REQs. */ + if (cm_id_priv->id.state == IB_CM_REQ_RCVD) + return; + + ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); + if (ret) + return; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_MRA_REQ_SENT: + cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, + CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout, + cm_id_priv->private_data, + cm_id_priv->private_data_len); + break; + case IB_CM_TIMEWAIT: + cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv, + IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0); + break; + default: + goto unlock; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, + &bad_send_wr); + if (ret) + goto free; + return; + +unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +free: cm_free_msg(msg); +} + +static struct cm_id_private * cm_match_req(struct cm_work *work, + struct cm_id_private *cm_id_priv) +{ + struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv; + struct cm_timewait_info *timewait_info; + struct cm_req_msg *req_msg; + unsigned long flags; + + req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; + + /* Check for duplicate REQ and stale connections. */ + spin_lock_irqsave(&cm.lock, flags); + timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info); + if (!timewait_info) + timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); + + if (timewait_info) { + cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, + timewait_info->work.remote_id); + spin_unlock_irqrestore(&cm.lock, flags); + if (cur_cm_id_priv) { + cm_dup_req_handler(work, cur_cm_id_priv); + cm_deref_id(cur_cm_id_priv); + } else + cm_issue_rej(work->port, work->mad_recv_wc, + IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, + NULL, 0); + goto error; + } + + /* Find matching listen request. */ + listen_cm_id_priv = cm_find_listen(req_msg->service_id); + if (!listen_cm_id_priv) { + spin_unlock_irqrestore(&cm.lock, flags); + cm_issue_rej(work->port, work->mad_recv_wc, + IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, + NULL, 0); + goto error; + } + atomic_inc(&listen_cm_id_priv->refcount); + atomic_inc(&cm_id_priv->refcount); + cm_id_priv->id.state = IB_CM_REQ_RCVD; + atomic_inc(&cm_id_priv->work_count); + spin_unlock_irqrestore(&cm.lock, flags); + return listen_cm_id_priv; + +error: cm_cleanup_timewait(cm_id_priv->timewait_info); + return NULL; +} + +static int cm_req_handler(struct cm_work *work) +{ + struct ib_cm_id *cm_id; + struct cm_id_private *cm_id_priv, *listen_cm_id_priv; + struct cm_req_msg *req_msg; + int ret; + + req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; + + cm_id = ib_create_cm_id(NULL, NULL); + if (IS_ERR(cm_id)) + return PTR_ERR(cm_id); + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + cm_id_priv->id.remote_id = req_msg->local_comm_id; + cm_init_av_for_response(work->port, work->mad_recv_wc->wc, + &cm_id_priv->av); + cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> + id.local_id); + if (IS_ERR(cm_id_priv->timewait_info)) { + ret = PTR_ERR(cm_id_priv->timewait_info); + goto error1; + } + cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id; + cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid; + cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg); + + listen_cm_id_priv = cm_match_req(work, cm_id_priv); + if (!listen_cm_id_priv) { + ret = -EINVAL; + goto error2; + } + + cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; + cm_id_priv->id.context = listen_cm_id_priv->id.context; + cm_id_priv->id.service_id = req_msg->service_id; + cm_id_priv->id.service_mask = ~0ULL; + + cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); + ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); + if (ret) + goto error3; + if (req_msg->alt_local_lid) { + ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av); + if (ret) + goto error3; + } + cm_id_priv->tid = req_msg->hdr.tid; + cm_id_priv->timeout_ms = cm_convert_to_ms( + cm_req_get_local_resp_timeout(req_msg)); + cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg); + cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg); + cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg); + cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg); + cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg); + cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg); + cm_id_priv->local_ack_timeout = + cm_req_get_primary_local_ack_timeout(req_msg); + cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); + cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); + + cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id); + cm_process_work(cm_id_priv, work); + cm_deref_id(listen_cm_id_priv); + return 0; + +error3: atomic_dec(&cm_id_priv->refcount); + cm_deref_id(listen_cm_id_priv); + cm_cleanup_timewait(cm_id_priv->timewait_info); +error2: kfree(cm_id_priv->timewait_info); +error1: ib_destroy_cm_id(&cm_id_priv->id); + return ret; +} + +static void cm_format_rep(struct cm_rep_msg *rep_msg, + struct cm_id_private *cm_id_priv, + struct ib_cm_rep_param *param) +{ + cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); + rep_msg->local_comm_id = cm_id_priv->id.local_id; + rep_msg->remote_comm_id = cm_id_priv->id.remote_id; + cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); + cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); + rep_msg->resp_resources = param->responder_resources; + rep_msg->initiator_depth = param->initiator_depth; + cm_rep_set_target_ack_delay(rep_msg, param->target_ack_delay); + cm_rep_set_failover(rep_msg, param->failover_accepted); + cm_rep_set_flow_ctrl(rep_msg, param->flow_control); + cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); + cm_rep_set_srq(rep_msg, param->srq); + rep_msg->local_ca_guid = cm_id_priv->av.port->cm_dev->ca_guid; + + if (param->private_data && param->private_data_len) + memcpy(rep_msg->private_data, param->private_data, + param->private_data_len); +} + +int ib_send_cm_rep(struct ib_cm_id *cm_id, + struct ib_cm_rep_param *param) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct cm_rep_msg *rep_msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + if (param->private_data && + param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_REQ_RCVD && + cm_id->state != IB_CM_MRA_REQ_SENT) { + ret = -EINVAL; + goto out; + } + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto out; + + rep_msg = (struct cm_rep_msg *) msg->mad; + cm_format_rep(rep_msg, cm_id_priv, param); + msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT; + + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + + cm_id->state = IB_CM_REP_SENT; + cm_id_priv->msg = msg; + cm_id_priv->initiator_depth = param->initiator_depth; + cm_id_priv->responder_resources = param->responder_resources; + cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg); + cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg); + +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_rep); + +static void cm_format_rtu(struct cm_rtu_msg *rtu_msg, + struct cm_id_private *cm_id_priv, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid); + rtu_msg->local_comm_id = cm_id_priv->id.local_id; + rtu_msg->remote_comm_id = cm_id_priv->id.remote_id; + + if (private_data && private_data_len) + memcpy(rtu_msg->private_data, private_data, private_data_len); +} + +int ib_send_cm_rtu(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + void *data; + int ret; + + if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE) + return -EINVAL; + + data = cm_copy_private_data(private_data, private_data_len); + if (IS_ERR(data)) + return PTR_ERR(data); + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_REP_RCVD && + cm_id->state != IB_CM_MRA_REP_SENT) { + ret = -EINVAL; + goto error; + } + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto error; + + cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, + private_data, private_data_len); + + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + kfree(data); + return ret; + } + + cm_id->state = IB_CM_ESTABLISHED; + cm_set_private_data(cm_id_priv, data, private_data_len); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return 0; + +error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + kfree(data); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_rtu); + +static void cm_format_rep_event(struct cm_work *work) +{ + struct cm_rep_msg *rep_msg; + struct ib_cm_rep_event_param *param; + + rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; + param = &work->cm_event.param.rep_rcvd; + param->remote_ca_guid = rep_msg->local_ca_guid; + param->remote_qkey = be32_to_cpu(rep_msg->local_qkey); + param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg)); + param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg)); + param->responder_resources = rep_msg->initiator_depth; + param->initiator_depth = rep_msg->resp_resources; + param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); + param->failover_accepted = cm_rep_get_failover(rep_msg); + param->flow_control = cm_rep_get_flow_ctrl(rep_msg); + param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); + param->srq = cm_rep_get_srq(rep_msg); + work->cm_event.private_data = &rep_msg->private_data; +} + +static void cm_dup_rep_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_rep_msg *rep_msg; + struct ib_mad_send_buf *msg = NULL; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, + rep_msg->local_comm_id); + if (!cm_id_priv) + return; + + ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); + if (ret) + goto deref; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state == IB_CM_ESTABLISHED) + cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, + cm_id_priv->private_data, + cm_id_priv->private_data_len); + else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT) + cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, + CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout, + cm_id_priv->private_data, + cm_id_priv->private_data_len); + else + goto unlock; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, + &bad_send_wr); + if (ret) + goto free; + goto deref; + +unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +free: cm_free_msg(msg); +deref: cm_deref_id(cm_id_priv); +} + +static int cm_rep_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_rep_msg *rep_msg; + unsigned long flags; + int ret; + + rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0); + if (!cm_id_priv) { + cm_dup_rep_handler(work); + return -EINVAL; + } + + cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; + cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; + cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg); + + spin_lock_irqsave(&cm.lock, flags); + /* Check for duplicate REP. */ + if (cm_insert_remote_id(cm_id_priv->timewait_info)) { + spin_unlock_irqrestore(&cm.lock, flags); + ret = -EINVAL; + goto error; + } + /* Check for a stale connection. */ + if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) { + spin_unlock_irqrestore(&cm.lock, flags); + cm_issue_rej(work->port, work->mad_recv_wc, + IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, + NULL, 0); + ret = -EINVAL; + goto error; + } + spin_unlock_irqrestore(&cm.lock, flags); + + cm_format_rep_event(work); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + break; + default: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = -EINVAL; + goto error; + } + cm_id_priv->id.state = IB_CM_REP_RCVD; + cm_id_priv->id.remote_id = rep_msg->local_comm_id; + cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg); + cm_id_priv->initiator_depth = rep_msg->resp_resources; + cm_id_priv->responder_resources = rep_msg->initiator_depth; + cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); + cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); + + /* todo: handle peer_to_peer */ + + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; + +error: cm_cleanup_timewait(cm_id_priv->timewait_info); + cm_deref_id(cm_id_priv); + return ret; +} + +static int cm_establish_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + unsigned long flags; + int ret; + + /* See comment in ib_cm_establish about lookup. */ + cm_id_priv = cm_acquire_id(work->local_id, work->remote_id); + if (!cm_id_priv) + return -EINVAL; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IB_CM_ESTABLISHED) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + goto out; + } + + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static int cm_rtu_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_rtu_msg *rtu_msg; + unsigned long flags; + int ret; + + rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id, + rtu_msg->local_comm_id); + if (!cm_id_priv) + return -EINVAL; + + work->cm_event.private_data = &rtu_msg->private_data; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IB_CM_REP_SENT && + cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + goto out; + } + cm_id_priv->id.state = IB_CM_ESTABLISHED; + + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static void cm_format_dreq(struct cm_dreq_msg *dreq_msg, + struct cm_id_private *cm_id_priv, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID, + cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ)); + dreq_msg->local_comm_id = cm_id_priv->id.local_id; + dreq_msg->remote_comm_id = cm_id_priv->id.remote_id; + cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn); + + if (private_data && private_data_len) + memcpy(dreq_msg->private_data, private_data, private_data_len); +} + +int ib_send_cm_dreq(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_ESTABLISHED) { + ret = -EINVAL; + goto out; + } + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) { + cm_enter_timewait(cm_id_priv); + goto out; + } + + cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, + private_data, private_data_len); + msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT; + + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) { + cm_enter_timewait(cm_id_priv); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + + cm_id->state = IB_CM_DREQ_SENT; + cm_id_priv->msg = msg; +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_dreq); + +static void cm_format_drep(struct cm_drep_msg *drep_msg, + struct cm_id_private *cm_id_priv, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid); + drep_msg->local_comm_id = cm_id_priv->id.local_id; + drep_msg->remote_comm_id = cm_id_priv->id.remote_id; + + if (private_data && private_data_len) + memcpy(drep_msg->private_data, private_data, private_data_len); +} + +int ib_send_cm_drep(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + void *data; + int ret; + + if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE) + return -EINVAL; + + data = cm_copy_private_data(private_data, private_data_len); + if (IS_ERR(data)) + return PTR_ERR(data); + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_DREQ_RCVD) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + kfree(data); + return -EINVAL; + } + + cm_set_private_data(cm_id_priv, data, private_data_len); + cm_enter_timewait(cm_id_priv); + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto out; + + cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, + private_data, private_data_len); + + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, + &bad_send_wr); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_drep); + +static int cm_dreq_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_dreq_msg *dreq_msg; + struct ib_mad_send_buf *msg = NULL; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, + dreq_msg->local_comm_id); + if (!cm_id_priv) + return -EINVAL; + + work->cm_event.private_data = &dreq_msg->private_data; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg)) + goto unlock; + + switch (cm_id_priv->id.state) { + case IB_CM_REP_SENT: + case IB_CM_DREQ_SENT: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + break; + case IB_CM_ESTABLISHED: + case IB_CM_MRA_REP_RCVD: + break; + case IB_CM_TIMEWAIT: + if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) + goto unlock; + + cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, + cm_id_priv->private_data, + cm_id_priv->private_data_len); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr)) + cm_free_msg(msg); + goto deref; + default: + goto unlock; + } + cm_id_priv->id.state = IB_CM_DREQ_RCVD; + cm_id_priv->tid = dreq_msg->hdr.tid; + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; + +unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +deref: cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static int cm_drep_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_drep_msg *drep_msg; + unsigned long flags; + int ret; + + drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id, + drep_msg->local_comm_id); + if (!cm_id_priv) + return -EINVAL; + + work->cm_event.private_data = &drep_msg->private_data; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IB_CM_DREQ_SENT && + cm_id_priv->id.state != IB_CM_DREQ_RCVD) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + goto out; + } + cm_enter_timewait(cm_id_priv); + + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +int ib_send_cm_rej(struct ib_cm_id *cm_id, + enum ib_cm_rej_reason reason, + void *ari, + u8 ari_length, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) || + (ari && ari_length > IB_CM_REJ_ARI_LENGTH)) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id->state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + case IB_CM_REQ_RCVD: + case IB_CM_MRA_REQ_SENT: + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + ret = cm_alloc_msg(cm_id_priv, &msg); + if (!ret) + cm_format_rej((struct cm_rej_msg *) msg->mad, + cm_id_priv, reason, ari, ari_length, + private_data, private_data_len); + + cm_reset_to_idle(cm_id_priv); + break; + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + ret = cm_alloc_msg(cm_id_priv, &msg); + if (!ret) + cm_format_rej((struct cm_rej_msg *) msg->mad, + cm_id_priv, reason, ari, ari_length, + private_data, private_data_len); + + cm_enter_timewait(cm_id_priv); + break; + default: + ret = -EINVAL; + goto out; + } + + if (ret) + goto out; + + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) + cm_free_msg(msg); + +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_rej); + +static void cm_format_rej_event(struct cm_work *work) +{ + struct cm_rej_msg *rej_msg; + struct ib_cm_rej_event_param *param; + + rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; + param = &work->cm_event.param.rej_rcvd; + param->ari = rej_msg->ari; + param->ari_length = cm_rej_get_reject_info_len(rej_msg); + param->reason = rej_msg->reason; + work->cm_event.private_data = &rej_msg->private_data; +} + +static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) +{ + struct cm_timewait_info *timewait_info; + struct cm_id_private *cm_id_priv; + unsigned long flags; + u32 remote_id; + + remote_id = rej_msg->local_comm_id; + + if (rej_msg->reason == IB_CM_REJ_TIMEOUT) { + spin_lock_irqsave(&cm.lock, flags); + timewait_info = cm_find_remote_id( *((u64 *) rej_msg->ari), + remote_id); + if (!timewait_info) { + spin_unlock_irqrestore(&cm.lock, flags); + return NULL; + } + cm_id_priv = idr_find(&cm.local_id_table, + (int) timewait_info->work.local_id); + if (cm_id_priv) { + if (cm_id_priv->id.remote_id == remote_id) + atomic_inc(&cm_id_priv->refcount); + else + cm_id_priv = NULL; + } + spin_unlock_irqrestore(&cm.lock, flags); + } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ) + cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0); + else + cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id); + + return cm_id_priv; +} + +static int cm_rej_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_rej_msg *rej_msg; + unsigned long flags; + int ret; + + rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_rejected_id(rej_msg); + if (!cm_id_priv) + return -EINVAL; + + cm_format_rej_event(work); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + /* fall through */ + case IB_CM_REQ_RCVD: + case IB_CM_MRA_REQ_SENT: + if (rej_msg->reason == IB_CM_REJ_STALE_CONN) + cm_enter_timewait(cm_id_priv); + else + cm_reset_to_idle(cm_id_priv); + break; + case IB_CM_DREQ_SENT: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + /* fall through */ + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + case IB_CM_ESTABLISHED: + cm_enter_timewait(cm_id_priv); + break; + default: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = -EINVAL; + goto out; + } + + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +int ib_send_cm_mra(struct ib_cm_id *cm_id, + u8 service_timeout, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + void *data; + unsigned long flags; + int ret; + + if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE) + return -EINVAL; + + data = cm_copy_private_data(private_data, private_data_len); + if (IS_ERR(data)) + return PTR_ERR(data); + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch(cm_id_priv->id.state) { + case IB_CM_REQ_RCVD: + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto error1; + + cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, + CM_MSG_RESPONSE_REQ, service_timeout, + private_data, private_data_len); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) + goto error2; + cm_id->state = IB_CM_MRA_REQ_SENT; + break; + case IB_CM_REP_RCVD: + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto error1; + + cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, + CM_MSG_RESPONSE_REP, service_timeout, + private_data, private_data_len); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) + goto error2; + cm_id->state = IB_CM_MRA_REP_SENT; + break; + case IB_CM_ESTABLISHED: + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto error1; + + cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, + CM_MSG_RESPONSE_OTHER, service_timeout, + private_data, private_data_len); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) + goto error2; + cm_id->lap_state = IB_CM_MRA_LAP_SENT; + break; + default: + ret = -EINVAL; + goto error1; + } + cm_id_priv->service_timeout = service_timeout; + cm_set_private_data(cm_id_priv, data, private_data_len); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return 0; + +error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + kfree(data); + return ret; + +error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + kfree(data); + cm_free_msg(msg); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_mra); + +static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg) +{ + switch (cm_mra_get_msg_mraed(mra_msg)) { + case CM_MSG_RESPONSE_REQ: + return cm_acquire_id(mra_msg->remote_comm_id, 0); + case CM_MSG_RESPONSE_REP: + case CM_MSG_RESPONSE_OTHER: + return cm_acquire_id(mra_msg->remote_comm_id, + mra_msg->local_comm_id); + default: + return NULL; + } +} + +static int cm_mra_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_mra_msg *mra_msg; + unsigned long flags; + int timeout, ret; + + mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_mraed_id(mra_msg); + if (!cm_id_priv) + return -EINVAL; + + work->cm_event.private_data = &mra_msg->private_data; + work->cm_event.param.mra_rcvd.service_timeout = + cm_mra_get_service_timeout(mra_msg); + timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + + cm_convert_to_ms(cm_id_priv->av.packet_life_time); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_SENT: + if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || + ib_modify_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg, timeout)) + goto out; + cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; + break; + case IB_CM_REP_SENT: + if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP || + ib_modify_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg, timeout)) + goto out; + cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; + break; + case IB_CM_ESTABLISHED: + if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || + cm_id_priv->id.lap_state != IB_CM_LAP_SENT || + ib_modify_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg, timeout)) + goto out; + cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; + break; + default: + goto out; + } + + cm_id_priv->msg->context[1] = (void *) (unsigned long) + cm_id_priv->id.state; + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static void cm_format_lap(struct cm_lap_msg *lap_msg, + struct cm_id_private *cm_id_priv, + struct ib_sa_path_rec *alternate_path, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID, + cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP)); + lap_msg->local_comm_id = cm_id_priv->id.local_id; + lap_msg->remote_comm_id = cm_id_priv->id.remote_id; + cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn); + /* todo: need remote CM response timeout */ + cm_lap_set_remote_resp_timeout(lap_msg, 0x1F); + lap_msg->alt_local_lid = alternate_path->slid; + lap_msg->alt_remote_lid = alternate_path->dlid; + lap_msg->alt_local_gid = alternate_path->sgid; + lap_msg->alt_remote_gid = alternate_path->dgid; + cm_lap_set_flow_label(lap_msg, alternate_path->flow_label); + cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class); + lap_msg->alt_hop_limit = alternate_path->hop_limit; + cm_lap_set_packet_rate(lap_msg, alternate_path->rate); + cm_lap_set_sl(lap_msg, alternate_path->sl); + cm_lap_set_subnet_local(lap_msg, 1); /* local only... */ + cm_lap_set_local_ack_timeout(lap_msg, + min(31, alternate_path->packet_life_time + 1)); + + if (private_data && private_data_len) + memcpy(lap_msg->private_data, private_data, private_data_len); +} + +int ib_send_cm_lap(struct ib_cm_id *cm_id, + struct ib_sa_path_rec *alternate_path, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_ESTABLISHED || + cm_id->lap_state != IB_CM_LAP_IDLE) { + ret = -EINVAL; + goto out; + } + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto out; + + cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv, + alternate_path, private_data, private_data_len); + msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED; + + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + + cm_id->lap_state = IB_CM_LAP_SENT; + cm_id_priv->msg = msg; + +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_lap); + +static void cm_format_path_from_lap(struct ib_sa_path_rec *path, + struct cm_lap_msg *lap_msg) +{ + memset(path, 0, sizeof *path); + path->dgid = lap_msg->alt_local_gid; + path->sgid = lap_msg->alt_remote_gid; + path->dlid = lap_msg->alt_local_lid; + path->slid = lap_msg->alt_remote_lid; + path->flow_label = cm_lap_get_flow_label(lap_msg); + path->hop_limit = lap_msg->alt_hop_limit; + path->traffic_class = cm_lap_get_traffic_class(lap_msg); + path->reversible = 1; + /* pkey is same as in REQ */ + path->sl = cm_lap_get_sl(lap_msg); + path->mtu_selector = IB_SA_EQ; + /* mtu is same as in REQ */ + path->rate_selector = IB_SA_EQ; + path->rate = cm_lap_get_packet_rate(lap_msg); + path->packet_life_time_selector = IB_SA_EQ; + path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg); + path->packet_life_time -= (path->packet_life_time > 0); +} + +static int cm_lap_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_lap_msg *lap_msg; + struct ib_cm_lap_event_param *param; + struct ib_mad_send_buf *msg = NULL; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + /* todo: verify LAP request and send reject APR if invalid. */ + lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id, + lap_msg->local_comm_id); + if (!cm_id_priv) + return -EINVAL; + + param = &work->cm_event.param.lap_rcvd; + param->alternate_path = &work->path[0]; + cm_format_path_from_lap(param->alternate_path, lap_msg); + work->cm_event.private_data = &lap_msg->private_data; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IB_CM_ESTABLISHED) + goto unlock; + + switch (cm_id_priv->id.lap_state) { + case IB_CM_LAP_IDLE: + break; + case IB_CM_MRA_LAP_SENT: + if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) + goto unlock; + + cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, + CM_MSG_RESPONSE_OTHER, + cm_id_priv->service_timeout, + cm_id_priv->private_data, + cm_id_priv->private_data_len); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr)) + cm_free_msg(msg); + goto deref; + default: + goto unlock; + } + + cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; + cm_id_priv->tid = lap_msg->hdr.tid; + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; + +unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +deref: cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static void cm_format_apr(struct cm_apr_msg *apr_msg, + struct cm_id_private *cm_id_priv, + enum ib_cm_apr_status status, + void *info, + u8 info_length, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid); + apr_msg->local_comm_id = cm_id_priv->id.local_id; + apr_msg->remote_comm_id = cm_id_priv->id.remote_id; + apr_msg->ap_status = (u8) status; + + if (info && info_length) { + apr_msg->info_length = info_length; + memcpy(apr_msg->info, info, info_length); + } + + if (private_data && private_data_len) + memcpy(apr_msg->private_data, private_data, private_data_len); +} + +int ib_send_cm_apr(struct ib_cm_id *cm_id, + enum ib_cm_apr_status status, + void *info, + u8 info_length, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) || + (info && info_length > IB_CM_APR_INFO_LENGTH)) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_ESTABLISHED || + (cm_id->lap_state != IB_CM_LAP_RCVD && + cm_id->lap_state != IB_CM_MRA_LAP_SENT)) { + ret = -EINVAL; + goto out; + } + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto out; + + cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status, + info, info_length, private_data, private_data_len); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + + cm_id->lap_state = IB_CM_LAP_IDLE; +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_apr); + +static int cm_apr_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_apr_msg *apr_msg; + unsigned long flags; + int ret; + + apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id, + apr_msg->local_comm_id); + if (!cm_id_priv) + return -EINVAL; /* Unmatched reply. */ + + work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status; + work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info; + work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length; + work->cm_event.private_data = &apr_msg->private_data; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IB_CM_ESTABLISHED || + (cm_id_priv->id.lap_state != IB_CM_LAP_SENT && + cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + goto out; + } + cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + cm_id_priv->msg = NULL; + + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static int cm_timewait_handler(struct cm_work *work) +{ + struct cm_timewait_info *timewait_info; + struct cm_id_private *cm_id_priv; + unsigned long flags; + int ret; + + timewait_info = (struct cm_timewait_info *)work; + cm_cleanup_timewait(timewait_info); + + cm_id_priv = cm_acquire_id(timewait_info->work.local_id, + timewait_info->work.remote_id); + if (!cm_id_priv) + return -EINVAL; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IB_CM_TIMEWAIT || + cm_id_priv->remote_qpn != timewait_info->remote_qpn) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + goto out; + } + cm_id_priv->id.state = IB_CM_IDLE; + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, + struct cm_id_private *cm_id_priv, + struct ib_cm_sidr_req_param *param) +{ + cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, + cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); + sidr_req_msg->request_id = cm_id_priv->id.local_id; + sidr_req_msg->pkey = param->pkey; + sidr_req_msg->service_id = param->service_id; + + if (param->private_data && param->private_data_len) + memcpy(sidr_req_msg->private_data, param->private_data, + param->private_data_len); +} + +int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, + struct ib_cm_sidr_req_param *param) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + if (!param->path || (param->private_data && + param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE)) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + ret = cm_init_av_by_path(param->path, &cm_id_priv->av); + if (ret) + goto out; + + cm_id->service_id = param->service_id; + cm_id->service_mask = ~0ULL; + cm_id_priv->timeout_ms = param->timeout_ms; + cm_id_priv->max_cm_retries = param->max_cm_retries; + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto out; + + cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv, + param); + msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state == IB_CM_IDLE) + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + else + ret = -EINVAL; + + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + goto out; + } + cm_id->state = IB_CM_SIDR_REQ_SENT; + cm_id_priv->msg = msg; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); +out: + return ret; +} +EXPORT_SYMBOL(ib_send_cm_sidr_req); + +static void cm_format_sidr_req_event(struct cm_work *work, + struct ib_cm_id *listen_id) +{ + struct cm_sidr_req_msg *sidr_req_msg; + struct ib_cm_sidr_req_event_param *param; + + sidr_req_msg = (struct cm_sidr_req_msg *) + work->mad_recv_wc->recv_buf.mad; + param = &work->cm_event.param.sidr_req_rcvd; + param->pkey = sidr_req_msg->pkey; + param->listen_id = listen_id; + param->device = work->port->mad_agent->device; + param->port = work->port->port_num; + work->cm_event.private_data = &sidr_req_msg->private_data; +} + +static int cm_sidr_req_handler(struct cm_work *work) +{ + struct ib_cm_id *cm_id; + struct cm_id_private *cm_id_priv, *cur_cm_id_priv; + struct cm_sidr_req_msg *sidr_req_msg; + struct ib_wc *wc; + unsigned long flags; + + cm_id = ib_create_cm_id(NULL, NULL); + if (IS_ERR(cm_id)) + return PTR_ERR(cm_id); + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + + /* Record SGID/SLID and request ID for lookup. */ + sidr_req_msg = (struct cm_sidr_req_msg *) + work->mad_recv_wc->recv_buf.mad; + wc = work->mad_recv_wc->wc; + cm_id_priv->av.dgid.global.subnet_prefix = wc->slid; + cm_id_priv->av.dgid.global.interface_id = 0; + cm_init_av_for_response(work->port, work->mad_recv_wc->wc, + &cm_id_priv->av); + cm_id_priv->id.remote_id = sidr_req_msg->request_id; + cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; + cm_id_priv->tid = sidr_req_msg->hdr.tid; + atomic_inc(&cm_id_priv->work_count); + + spin_lock_irqsave(&cm.lock, flags); + cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); + if (cur_cm_id_priv) { + spin_unlock_irqrestore(&cm.lock, flags); + goto out; /* Duplicate message. */ + } + cur_cm_id_priv = cm_find_listen(sidr_req_msg->service_id); + if (!cur_cm_id_priv) { + rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); + spin_unlock_irqrestore(&cm.lock, flags); + /* todo: reply with no match */ + goto out; /* No match. */ + } + atomic_inc(&cur_cm_id_priv->refcount); + spin_unlock_irqrestore(&cm.lock, flags); + + cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; + cm_id_priv->id.context = cur_cm_id_priv->id.context; + cm_id_priv->id.service_id = sidr_req_msg->service_id; + cm_id_priv->id.service_mask = ~0ULL; + + cm_format_sidr_req_event(work, &cur_cm_id_priv->id); + cm_process_work(cm_id_priv, work); + cm_deref_id(cur_cm_id_priv); + return 0; +out: + ib_destroy_cm_id(&cm_id_priv->id); + return -EINVAL; +} + +static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg, + struct cm_id_private *cm_id_priv, + struct ib_cm_sidr_rep_param *param) +{ + cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID, + cm_id_priv->tid); + sidr_rep_msg->request_id = cm_id_priv->id.remote_id; + sidr_rep_msg->status = param->status; + cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num)); + sidr_rep_msg->service_id = cm_id_priv->id.service_id; + sidr_rep_msg->qkey = cpu_to_be32(param->qkey); + + if (param->info && param->info_length) + memcpy(sidr_rep_msg->info, param->info, param->info_length); + + if (param->private_data && param->private_data_len) + memcpy(sidr_rep_msg->private_data, param->private_data, + param->private_data_len); +} + +int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, + struct ib_cm_sidr_rep_param *param) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct ib_send_wr *bad_send_wr; + unsigned long flags; + int ret; + + if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) || + (param->private_data && + param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE)) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_SIDR_REQ_RCVD) { + ret = -EINVAL; + goto error; + } + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto error; + + cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, + param); + ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, + &msg->send_wr, &bad_send_wr); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + cm_id->state = IB_CM_IDLE; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + spin_lock_irqsave(&cm.lock, flags); + rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); + spin_unlock_irqrestore(&cm.lock, flags); + return 0; + +error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_sidr_rep); + +static void cm_format_sidr_rep_event(struct cm_work *work) +{ + struct cm_sidr_rep_msg *sidr_rep_msg; + struct ib_cm_sidr_rep_event_param *param; + + sidr_rep_msg = (struct cm_sidr_rep_msg *) + work->mad_recv_wc->recv_buf.mad; + param = &work->cm_event.param.sidr_rep_rcvd; + param->status = sidr_rep_msg->status; + param->qkey = be32_to_cpu(sidr_rep_msg->qkey); + param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg)); + param->info = &sidr_rep_msg->info; + param->info_len = sidr_rep_msg->info_length; + work->cm_event.private_data = &sidr_rep_msg->private_data; +} + +static int cm_sidr_rep_handler(struct cm_work *work) +{ + struct cm_sidr_rep_msg *sidr_rep_msg; + struct cm_id_private *cm_id_priv; + unsigned long flags; + + sidr_rep_msg = (struct cm_sidr_rep_msg *) + work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0); + if (!cm_id_priv) + return -EINVAL; /* Unmatched reply. */ + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + goto out; + } + cm_id_priv->id.state = IB_CM_IDLE; + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + (unsigned long) cm_id_priv->msg); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + cm_format_sidr_rep_event(work); + cm_process_work(cm_id_priv, work); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static void cm_process_send_error(struct ib_mad_send_buf *msg, + enum ib_wc_status wc_status) +{ + struct cm_id_private *cm_id_priv; + struct ib_cm_event cm_event; + enum ib_cm_state state; + unsigned long flags; + int ret; + + memset(&cm_event, 0, sizeof cm_event); + cm_id_priv = msg->context[0]; + + /* Discard old sends or ones without a response. */ + spin_lock_irqsave(&cm_id_priv->lock, flags); + state = (enum ib_cm_state) (unsigned long) msg->context[1]; + if (msg != cm_id_priv->msg || state != cm_id_priv->id.state) + goto discard; + + switch (state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + cm_reset_to_idle(cm_id_priv); + cm_event.event = IB_CM_REQ_ERROR; + break; + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + cm_reset_to_idle(cm_id_priv); + cm_event.event = IB_CM_REP_ERROR; + break; + case IB_CM_DREQ_SENT: + cm_enter_timewait(cm_id_priv); + cm_event.event = IB_CM_DREQ_ERROR; + break; + case IB_CM_SIDR_REQ_SENT: + cm_id_priv->id.state = IB_CM_IDLE; + cm_event.event = IB_CM_SIDR_REQ_ERROR; + break; + default: + goto discard; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_event.param.send_status = wc_status; + + /* No other events can occur on the cm_id at this point. */ + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event); + cm_free_msg(msg); + if (ret) + ib_destroy_cm_id(&cm_id_priv->id); + return; +discard: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); +} + +static void cm_send_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_send_wc *mad_send_wc) +{ + struct ib_mad_send_buf *msg; + + msg = (struct ib_mad_send_buf *)(unsigned long)mad_send_wc->wr_id; + + switch (mad_send_wc->status) { + case IB_WC_SUCCESS: + case IB_WC_WR_FLUSH_ERR: + cm_free_msg(msg); + break; + default: + if (msg->context[0] && msg->context[1]) + cm_process_send_error(msg, mad_send_wc->status); + else + cm_free_msg(msg); + break; + } +} + +static void cm_work_handler(void *data) +{ + struct cm_work *work = data; + int ret; + + switch (work->cm_event.event) { + case IB_CM_REQ_RECEIVED: + ret = cm_req_handler(work); + break; + case IB_CM_MRA_RECEIVED: + ret = cm_mra_handler(work); + break; + case IB_CM_REJ_RECEIVED: + ret = cm_rej_handler(work); + break; + case IB_CM_REP_RECEIVED: + ret = cm_rep_handler(work); + break; + case IB_CM_RTU_RECEIVED: + ret = cm_rtu_handler(work); + break; + case IB_CM_USER_ESTABLISHED: + ret = cm_establish_handler(work); + break; + case IB_CM_DREQ_RECEIVED: + ret = cm_dreq_handler(work); + break; + case IB_CM_DREP_RECEIVED: + ret = cm_drep_handler(work); + break; + case IB_CM_SIDR_REQ_RECEIVED: + ret = cm_sidr_req_handler(work); + break; + case IB_CM_SIDR_REP_RECEIVED: + ret = cm_sidr_rep_handler(work); + break; + case IB_CM_LAP_RECEIVED: + ret = cm_lap_handler(work); + break; + case IB_CM_APR_RECEIVED: + ret = cm_apr_handler(work); + break; + case IB_CM_TIMEWAIT_EXIT: + ret = cm_timewait_handler(work); + break; + default: + ret = -EINVAL; + break; + } + if (ret) + cm_free_work(work); +} + +int ib_cm_establish(struct ib_cm_id *cm_id) +{ + struct cm_id_private *cm_id_priv; + struct cm_work *work; + unsigned long flags; + int ret = 0; + + work = kmalloc(sizeof *work, GFP_ATOMIC); + if (!work) + return -ENOMEM; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id->state) + { + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + cm_id->state = IB_CM_ESTABLISHED; + break; + case IB_CM_ESTABLISHED: + ret = -EISCONN; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) { + kfree(work); + goto out; + } + + /* + * The CM worker thread may try to destroy the cm_id before it + * can execute this work item. To prevent potential deadlock, + * we need to find the cm_id once we're in the context of the + * worker thread, rather than holding a reference on it. + */ + INIT_WORK(&work->work, cm_work_handler, work); + work->local_id = cm_id->local_id; + work->remote_id = cm_id->remote_id; + work->mad_recv_wc = NULL; + work->cm_event.event = IB_CM_USER_ESTABLISHED; + queue_work(cm.wq, &work->work); +out: + return ret; +} +EXPORT_SYMBOL(ib_cm_establish); + +static void cm_recv_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct cm_work *work; + enum ib_cm_event_type event; + int paths = 0; + + switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) { + case CM_REQ_ATTR_ID: + paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)-> + alt_local_lid != 0); + event = IB_CM_REQ_RECEIVED; + break; + case CM_MRA_ATTR_ID: + event = IB_CM_MRA_RECEIVED; + break; + case CM_REJ_ATTR_ID: + event = IB_CM_REJ_RECEIVED; + break; + case CM_REP_ATTR_ID: + event = IB_CM_REP_RECEIVED; + break; + case CM_RTU_ATTR_ID: + event = IB_CM_RTU_RECEIVED; + break; + case CM_DREQ_ATTR_ID: + event = IB_CM_DREQ_RECEIVED; + break; + case CM_DREP_ATTR_ID: + event = IB_CM_DREP_RECEIVED; + break; + case CM_SIDR_REQ_ATTR_ID: + event = IB_CM_SIDR_REQ_RECEIVED; + break; + case CM_SIDR_REP_ATTR_ID: + event = IB_CM_SIDR_REP_RECEIVED; + break; + case CM_LAP_ATTR_ID: + paths = 1; + event = IB_CM_LAP_RECEIVED; + break; + case CM_APR_ATTR_ID: + event = IB_CM_APR_RECEIVED; + break; + default: + ib_free_recv_mad(mad_recv_wc); + return; + } + + work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths, + GFP_KERNEL); + if (!work) { + ib_free_recv_mad(mad_recv_wc); + return; + } + + INIT_WORK(&work->work, cm_work_handler, work); + work->cm_event.event = event; + work->mad_recv_wc = mad_recv_wc; + work->port = (struct cm_port *)mad_agent->context; + queue_work(cm.wq, &work->work); +} + +static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + case IB_CM_REQ_RCVD: + case IB_CM_MRA_REQ_SENT: + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + case IB_CM_ESTABLISHED: + *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | IB_QP_PORT; + qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; + if (cm_id_priv->responder_resources) + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ; + qp_attr->pkey_index = cm_id_priv->av.pkey_index; + qp_attr->port_num = cm_id_priv->av.port->port_num; + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_RCVD: + case IB_CM_MRA_REQ_SENT: + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + case IB_CM_ESTABLISHED: + *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | + IB_QP_DEST_QPN | IB_QP_RQ_PSN | + IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; + qp_attr->ah_attr = cm_id_priv->av.ah_attr; + qp_attr->path_mtu = cm_id_priv->path_mtu; + qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); + qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); + qp_attr->max_dest_rd_atomic = cm_id_priv->responder_resources; + qp_attr->min_rnr_timer = 0; + if (cm_id_priv->alt_av.ah_attr.dlid) { + *qp_attr_mask |= IB_QP_ALT_PATH; + qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; + } + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + case IB_CM_ESTABLISHED: + *qp_attr_mask = IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | IB_QP_SQ_PSN | + IB_QP_MAX_QP_RD_ATOMIC; + qp_attr->timeout = cm_id_priv->local_ack_timeout; + qp_attr->retry_cnt = cm_id_priv->retry_count; + qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; + qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); + qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; + if (cm_id_priv->alt_av.ah_attr.dlid) { + *qp_attr_mask |= IB_QP_PATH_MIG_STATE; + qp_attr->path_mig_state = IB_MIG_REARM; + } + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + struct cm_id_private *cm_id_priv; + int ret; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + switch (qp_attr->qp_state) { + case IB_QPS_INIT: + ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask); + break; + case IB_QPS_RTR: + ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask); + break; + case IB_QPS_RTS: + ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask); + break; + default: + ret = -EINVAL; + break; + } + return ret; +} +EXPORT_SYMBOL(ib_cm_init_qp_attr); + +static u64 cm_get_ca_guid(struct ib_device *device) +{ + struct ib_device_attr *device_attr; + u64 guid; + int ret; + + device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL); + if (!device_attr) + return 0; + + ret = ib_query_device(device, device_attr); + guid = ret ? 0 : device_attr->node_guid; + kfree(device_attr); + return guid; +} + +static void cm_add_one(struct ib_device *device) +{ + struct cm_device *cm_dev; + struct cm_port *port; + struct ib_mad_reg_req reg_req = { + .mgmt_class = IB_MGMT_CLASS_CM, + .mgmt_class_version = IB_CM_CLASS_VERSION + }; + struct ib_port_modify port_modify = { + .set_port_cap_mask = IB_PORT_CM_SUP + }; + unsigned long flags; + int ret; + u8 i; + + cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) * + device->phys_port_cnt, GFP_KERNEL); + if (!cm_dev) + return; + + cm_dev->device = device; + cm_dev->ca_guid = cm_get_ca_guid(device); + if (!cm_dev->ca_guid) + goto error1; + + set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); + for (i = 1; i <= device->phys_port_cnt; i++) { + port = &cm_dev->port[i-1]; + port->cm_dev = cm_dev; + port->port_num = i; + port->mad_agent = ib_register_mad_agent(device, i, + IB_QPT_GSI, + ®_req, + 0, + cm_send_handler, + cm_recv_handler, + port); + if (IS_ERR(port->mad_agent)) + goto error2; + + ret = ib_modify_port(device, i, 0, &port_modify); + if (ret) + goto error3; + } + ib_set_client_data(device, &cm_client, cm_dev); + + write_lock_irqsave(&cm.device_lock, flags); + list_add_tail(&cm_dev->list, &cm.device_list); + write_unlock_irqrestore(&cm.device_lock, flags); + return; + +error3: + ib_unregister_mad_agent(port->mad_agent); +error2: + port_modify.set_port_cap_mask = 0; + port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; + while (--i) { + port = &cm_dev->port[i-1]; + ib_modify_port(device, port->port_num, 0, &port_modify); + ib_unregister_mad_agent(port->mad_agent); + } +error1: + kfree(cm_dev); +} + +static void cm_remove_one(struct ib_device *device) +{ + struct cm_device *cm_dev; + struct cm_port *port; + struct ib_port_modify port_modify = { + .clr_port_cap_mask = IB_PORT_CM_SUP + }; + unsigned long flags; + int i; + + cm_dev = ib_get_client_data(device, &cm_client); + if (!cm_dev) + return; + + write_lock_irqsave(&cm.device_lock, flags); + list_del(&cm_dev->list); + write_unlock_irqrestore(&cm.device_lock, flags); + + for (i = 1; i <= device->phys_port_cnt; i++) { + port = &cm_dev->port[i-1]; + ib_modify_port(device, port->port_num, 0, &port_modify); + ib_unregister_mad_agent(port->mad_agent); + } + kfree(cm_dev); +} + +static int __init ib_cm_init(void) +{ + int ret; + + memset(&cm, 0, sizeof cm); + INIT_LIST_HEAD(&cm.device_list); + rwlock_init(&cm.device_lock); + spin_lock_init(&cm.lock); + cm.listen_service_table = RB_ROOT; + cm.listen_service_id = __constant_be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); + cm.remote_id_table = RB_ROOT; + cm.remote_qp_table = RB_ROOT; + cm.remote_sidr_table = RB_ROOT; + idr_init(&cm.local_id_table); + idr_pre_get(&cm.local_id_table, GFP_KERNEL); + + cm.wq = create_workqueue("ib_cm"); + if (!cm.wq) + return -ENOMEM; + + ret = ib_register_client(&cm_client); + if (ret) + goto error; + + return 0; +error: + destroy_workqueue(cm.wq); + return ret; +} + +static void __exit ib_cm_cleanup(void) +{ + flush_workqueue(cm.wq); + destroy_workqueue(cm.wq); + ib_unregister_client(&cm_client); +} + +module_init(ib_cm_init); +module_exit(ib_cm_cleanup); + diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h new file mode 100644 index 000000000000..15a309a77b2b --- /dev/null +++ b/drivers/infiniband/core/cm_msgs.h @@ -0,0 +1,819 @@ +/* + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING the madirectory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use source and binary forms, with or + * withmodification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retathe above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHWARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE + * SOFTWARE. + */ +#if !defined(CM_MSGS_H) +#define CM_MSGS_H + +#include + +/* + * Parameters to routines below should be in network-byte order, and values + * are returned in network-byte order. + */ + +#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ + +enum cm_msg_attr_id { + CM_REQ_ATTR_ID = __constant_htons(0x0010), + CM_MRA_ATTR_ID = __constant_htons(0x0011), + CM_REJ_ATTR_ID = __constant_htons(0x0012), + CM_REP_ATTR_ID = __constant_htons(0x0013), + CM_RTU_ATTR_ID = __constant_htons(0x0014), + CM_DREQ_ATTR_ID = __constant_htons(0x0015), + CM_DREP_ATTR_ID = __constant_htons(0x0016), + CM_SIDR_REQ_ATTR_ID = __constant_htons(0x0017), + CM_SIDR_REP_ATTR_ID = __constant_htons(0x0018), + CM_LAP_ATTR_ID = __constant_htons(0x0019), + CM_APR_ATTR_ID = __constant_htons(0x001A) +}; + +enum cm_msg_sequence { + CM_MSG_SEQUENCE_REQ, + CM_MSG_SEQUENCE_LAP, + CM_MSG_SEQUENCE_DREQ, + CM_MSG_SEQUENCE_SIDR +}; + +struct cm_req_msg { + struct ib_mad_hdr hdr; + + u32 local_comm_id; + u32 rsvd4; + u64 service_id; + u64 local_ca_guid; + u32 rsvd24; + u32 local_qkey; + /* local QPN:24, responder resources:8 */ + u32 offset32; + /* local EECN:24, initiator depth:8 */ + u32 offset36; + /* + * remote EECN:24, remote CM response timeout:5, + * transport service type:2, end-to-end flow control:1 + */ + u32 offset40; + /* starting PSN:24, local CM response timeout:5, retry count:3 */ + u32 offset44; + u16 pkey; + /* path MTU:4, RDC exists:1, RNR retry count:3. */ + u8 offset50; + /* max CM Retries:4, SRQ:1, rsvd:3 */ + u8 offset51; + + u16 primary_local_lid; + u16 primary_remote_lid; + union ib_gid primary_local_gid; + union ib_gid primary_remote_gid; + /* flow label:20, rsvd:6, packet rate:6 */ + u32 primary_offset88; + u8 primary_traffic_class; + u8 primary_hop_limit; + /* SL:4, subnet local:1, rsvd:3 */ + u8 primary_offset94; + /* local ACK timeout:5, rsvd:3 */ + u8 primary_offset95; + + u16 alt_local_lid; + u16 alt_remote_lid; + union ib_gid alt_local_gid; + union ib_gid alt_remote_gid; + /* flow label:20, rsvd:6, packet rate:6 */ + u32 alt_offset132; + u8 alt_traffic_class; + u8 alt_hop_limit; + /* SL:4, subnet local:1, rsvd:3 */ + u8 alt_offset138; + /* local ACK timeout:5, rsvd:3 */ + u8 alt_offset139; + + u8 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE]; + +} __attribute__ ((packed)); + +static inline u32 cm_req_get_local_qpn(struct cm_req_msg *req_msg) +{ + return cpu_to_be32(be32_to_cpu(req_msg->offset32) >> 8); +} + +static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, u32 qpn) +{ + req_msg->offset32 = cpu_to_be32((be32_to_cpu(qpn) << 8) | + (be32_to_cpu(req_msg->offset32) & + 0x000000FF)); +} + +static inline u8 cm_req_get_resp_res(struct cm_req_msg *req_msg) +{ + return (u8) be32_to_cpu(req_msg->offset32); +} + +static inline void cm_req_set_resp_res(struct cm_req_msg *req_msg, u8 resp_res) +{ + req_msg->offset32 = cpu_to_be32(resp_res | + (be32_to_cpu(req_msg->offset32) & + 0xFFFFFF00)); +} + +static inline u8 cm_req_get_init_depth(struct cm_req_msg *req_msg) +{ + return (u8) be32_to_cpu(req_msg->offset36); +} + +static inline void cm_req_set_init_depth(struct cm_req_msg *req_msg, + u8 init_depth) +{ + req_msg->offset36 = cpu_to_be32(init_depth | + (be32_to_cpu(req_msg->offset36) & + 0xFFFFFF00)); +} + +static inline u8 cm_req_get_remote_resp_timeout(struct cm_req_msg *req_msg) +{ + return (u8) ((be32_to_cpu(req_msg->offset40) & 0xF8) >> 3); +} + +static inline void cm_req_set_remote_resp_timeout(struct cm_req_msg *req_msg, + u8 resp_timeout) +{ + req_msg->offset40 = cpu_to_be32((resp_timeout << 3) | + (be32_to_cpu(req_msg->offset40) & + 0xFFFFFF07)); +} + +static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg) +{ + u8 transport_type = (u8) (be32_to_cpu(req_msg->offset40) & 0x06) >> 1; + switch(transport_type) { + case 0: return IB_QPT_RC; + case 1: return IB_QPT_UC; + default: return 0; + } +} + +static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg, + enum ib_qp_type qp_type) +{ + switch(qp_type) { + case IB_QPT_UC: + req_msg->offset40 = cpu_to_be32((be32_to_cpu( + req_msg->offset40) & + 0xFFFFFFF9) | 0x2); + default: + req_msg->offset40 = cpu_to_be32(be32_to_cpu( + req_msg->offset40) & + 0xFFFFFFF9); + } +} + +static inline u8 cm_req_get_flow_ctrl(struct cm_req_msg *req_msg) +{ + return be32_to_cpu(req_msg->offset40) & 0x1; +} + +static inline void cm_req_set_flow_ctrl(struct cm_req_msg *req_msg, + u8 flow_ctrl) +{ + req_msg->offset40 = cpu_to_be32((flow_ctrl & 0x1) | + (be32_to_cpu(req_msg->offset40) & + 0xFFFFFFFE)); +} + +static inline u32 cm_req_get_starting_psn(struct cm_req_msg *req_msg) +{ + return cpu_to_be32(be32_to_cpu(req_msg->offset44) >> 8); +} + +static inline void cm_req_set_starting_psn(struct cm_req_msg *req_msg, + u32 starting_psn) +{ + req_msg->offset44 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) | + (be32_to_cpu(req_msg->offset44) & 0x000000FF)); +} + +static inline u8 cm_req_get_local_resp_timeout(struct cm_req_msg *req_msg) +{ + return (u8) ((be32_to_cpu(req_msg->offset44) & 0xF8) >> 3); +} + +static inline void cm_req_set_local_resp_timeout(struct cm_req_msg *req_msg, + u8 resp_timeout) +{ + req_msg->offset44 = cpu_to_be32((resp_timeout << 3) | + (be32_to_cpu(req_msg->offset44) & 0xFFFFFF07)); +} + +static inline u8 cm_req_get_retry_count(struct cm_req_msg *req_msg) +{ + return (u8) (be32_to_cpu(req_msg->offset44) & 0x7); +} + +static inline void cm_req_set_retry_count(struct cm_req_msg *req_msg, + u8 retry_count) +{ + req_msg->offset44 = cpu_to_be32((retry_count & 0x7) | + (be32_to_cpu(req_msg->offset44) & 0xFFFFFFF8)); +} + +static inline u8 cm_req_get_path_mtu(struct cm_req_msg *req_msg) +{ + return req_msg->offset50 >> 4; +} + +static inline void cm_req_set_path_mtu(struct cm_req_msg *req_msg, u8 path_mtu) +{ + req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF) | (path_mtu << 4)); +} + +static inline u8 cm_req_get_rnr_retry_count(struct cm_req_msg *req_msg) +{ + return req_msg->offset50 & 0x7; +} + +static inline void cm_req_set_rnr_retry_count(struct cm_req_msg *req_msg, + u8 rnr_retry_count) +{ + req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF8) | + (rnr_retry_count & 0x7)); +} + +static inline u8 cm_req_get_max_cm_retries(struct cm_req_msg *req_msg) +{ + return req_msg->offset51 >> 4; +} + +static inline void cm_req_set_max_cm_retries(struct cm_req_msg *req_msg, + u8 retries) +{ + req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF) | (retries << 4)); +} + +static inline u8 cm_req_get_srq(struct cm_req_msg *req_msg) +{ + return (req_msg->offset51 & 0x8) >> 3; +} + +static inline void cm_req_set_srq(struct cm_req_msg *req_msg, u8 srq) +{ + req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF7) | + ((srq & 0x1) << 3)); +} + +static inline u32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg) +{ + return cpu_to_be32((be32_to_cpu(req_msg->primary_offset88) >> 12)); +} + +static inline void cm_req_set_primary_flow_label(struct cm_req_msg *req_msg, + u32 flow_label) +{ + req_msg->primary_offset88 = cpu_to_be32( + (be32_to_cpu(req_msg->primary_offset88) & + 0x00000FFF) | + (be32_to_cpu(flow_label) << 12)); +} + +static inline u8 cm_req_get_primary_packet_rate(struct cm_req_msg *req_msg) +{ + return (u8) (be32_to_cpu(req_msg->primary_offset88) & 0x3F); +} + +static inline void cm_req_set_primary_packet_rate(struct cm_req_msg *req_msg, + u8 rate) +{ + req_msg->primary_offset88 = cpu_to_be32( + (be32_to_cpu(req_msg->primary_offset88) & + 0xFFFFFFC0) | (rate & 0x3F)); +} + +static inline u8 cm_req_get_primary_sl(struct cm_req_msg *req_msg) +{ + return (u8) (req_msg->primary_offset94 >> 4); +} + +static inline void cm_req_set_primary_sl(struct cm_req_msg *req_msg, u8 sl) +{ + req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0x0F) | + (sl << 4)); +} + +static inline u8 cm_req_get_primary_subnet_local(struct cm_req_msg *req_msg) +{ + return (u8) ((req_msg->primary_offset94 & 0x08) >> 3); +} + +static inline void cm_req_set_primary_subnet_local(struct cm_req_msg *req_msg, + u8 subnet_local) +{ + req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0xF7) | + ((subnet_local & 0x1) << 3)); +} + +static inline u8 cm_req_get_primary_local_ack_timeout(struct cm_req_msg *req_msg) +{ + return (u8) (req_msg->primary_offset95 >> 3); +} + +static inline void cm_req_set_primary_local_ack_timeout(struct cm_req_msg *req_msg, + u8 local_ack_timeout) +{ + req_msg->primary_offset95 = (u8) ((req_msg->primary_offset95 & 0x07) | + (local_ack_timeout << 3)); +} + +static inline u32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg) +{ + return cpu_to_be32((be32_to_cpu(req_msg->alt_offset132) >> 12)); +} + +static inline void cm_req_set_alt_flow_label(struct cm_req_msg *req_msg, + u32 flow_label) +{ + req_msg->alt_offset132 = cpu_to_be32( + (be32_to_cpu(req_msg->alt_offset132) & + 0x00000FFF) | + (be32_to_cpu(flow_label) << 12)); +} + +static inline u8 cm_req_get_alt_packet_rate(struct cm_req_msg *req_msg) +{ + return (u8) (be32_to_cpu(req_msg->alt_offset132) & 0x3F); +} + +static inline void cm_req_set_alt_packet_rate(struct cm_req_msg *req_msg, + u8 rate) +{ + req_msg->alt_offset132 = cpu_to_be32( + (be32_to_cpu(req_msg->alt_offset132) & + 0xFFFFFFC0) | (rate & 0x3F)); +} + +static inline u8 cm_req_get_alt_sl(struct cm_req_msg *req_msg) +{ + return (u8) (req_msg->alt_offset138 >> 4); +} + +static inline void cm_req_set_alt_sl(struct cm_req_msg *req_msg, u8 sl) +{ + req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0x0F) | + (sl << 4)); +} + +static inline u8 cm_req_get_alt_subnet_local(struct cm_req_msg *req_msg) +{ + return (u8) ((req_msg->alt_offset138 & 0x08) >> 3); +} + +static inline void cm_req_set_alt_subnet_local(struct cm_req_msg *req_msg, + u8 subnet_local) +{ + req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0xF7) | + ((subnet_local & 0x1) << 3)); +} + +static inline u8 cm_req_get_alt_local_ack_timeout(struct cm_req_msg *req_msg) +{ + return (u8) (req_msg->alt_offset139 >> 3); +} + +static inline void cm_req_set_alt_local_ack_timeout(struct cm_req_msg *req_msg, + u8 local_ack_timeout) +{ + req_msg->alt_offset139 = (u8) ((req_msg->alt_offset139 & 0x07) | + (local_ack_timeout << 3)); +} + +/* Message REJected or MRAed */ +enum cm_msg_response { + CM_MSG_RESPONSE_REQ = 0x0, + CM_MSG_RESPONSE_REP = 0x1, + CM_MSG_RESPONSE_OTHER = 0x2 +}; + + struct cm_mra_msg { + struct ib_mad_hdr hdr; + + u32 local_comm_id; + u32 remote_comm_id; + /* message MRAed:2, rsvd:6 */ + u8 offset8; + /* service timeout:5, rsvd:3 */ + u8 offset9; + + u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE]; + +} __attribute__ ((packed)); + +static inline u8 cm_mra_get_msg_mraed(struct cm_mra_msg *mra_msg) +{ + return (u8) (mra_msg->offset8 >> 6); +} + +static inline void cm_mra_set_msg_mraed(struct cm_mra_msg *mra_msg, u8 msg) +{ + mra_msg->offset8 = (u8) ((mra_msg->offset8 & 0x3F) | (msg << 6)); +} + +static inline u8 cm_mra_get_service_timeout(struct cm_mra_msg *mra_msg) +{ + return (u8) (mra_msg->offset9 >> 3); +} + +static inline void cm_mra_set_service_timeout(struct cm_mra_msg *mra_msg, + u8 service_timeout) +{ + mra_msg->offset9 = (u8) ((mra_msg->offset9 & 0x07) | + (service_timeout << 3)); +} + +struct cm_rej_msg { + struct ib_mad_hdr hdr; + + u32 local_comm_id; + u32 remote_comm_id; + /* message REJected:2, rsvd:6 */ + u8 offset8; + /* reject info length:7, rsvd:1. */ + u8 offset9; + u16 reason; + u8 ari[IB_CM_REJ_ARI_LENGTH]; + + u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE]; + +} __attribute__ ((packed)); + +static inline u8 cm_rej_get_msg_rejected(struct cm_rej_msg *rej_msg) +{ + return (u8) (rej_msg->offset8 >> 6); +} + +static inline void cm_rej_set_msg_rejected(struct cm_rej_msg *rej_msg, u8 msg) +{ + rej_msg->offset8 = (u8) ((rej_msg->offset8 & 0x3F) | (msg << 6)); +} + +static inline u8 cm_rej_get_reject_info_len(struct cm_rej_msg *rej_msg) +{ + return (u8) (rej_msg->offset9 >> 1); +} + +static inline void cm_rej_set_reject_info_len(struct cm_rej_msg *rej_msg, + u8 len) +{ + rej_msg->offset9 = (u8) ((rej_msg->offset9 & 0x1) | (len << 1)); +} + +struct cm_rep_msg { + struct ib_mad_hdr hdr; + + u32 local_comm_id; + u32 remote_comm_id; + u32 local_qkey; + /* local QPN:24, rsvd:8 */ + u32 offset12; + /* local EECN:24, rsvd:8 */ + u32 offset16; + /* starting PSN:24 rsvd:8 */ + u32 offset20; + u8 resp_resources; + u8 initiator_depth; + /* target ACK delay:5, failover accepted:2, end-to-end flow control:1 */ + u8 offset26; + /* RNR retry count:3, SRQ:1, rsvd:5 */ + u8 offset27; + u64 local_ca_guid; + + u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE]; + +} __attribute__ ((packed)); + +static inline u32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg) +{ + return cpu_to_be32(be32_to_cpu(rep_msg->offset12) >> 8); +} + +static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, u32 qpn) +{ + rep_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) | + (be32_to_cpu(rep_msg->offset12) & 0x000000FF)); +} + +static inline u32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg) +{ + return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8); +} + +static inline void cm_rep_set_starting_psn(struct cm_rep_msg *rep_msg, + u32 starting_psn) +{ + rep_msg->offset20 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) | + (be32_to_cpu(rep_msg->offset20) & 0x000000FF)); +} + +static inline u8 cm_rep_get_target_ack_delay(struct cm_rep_msg *rep_msg) +{ + return (u8) (rep_msg->offset26 >> 3); +} + +static inline void cm_rep_set_target_ack_delay(struct cm_rep_msg *rep_msg, + u8 target_ack_delay) +{ + rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0x07) | + (target_ack_delay << 3)); +} + +static inline u8 cm_rep_get_failover(struct cm_rep_msg *rep_msg) +{ + return (u8) ((rep_msg->offset26 & 0x06) >> 1); +} + +static inline void cm_rep_set_failover(struct cm_rep_msg *rep_msg, u8 failover) +{ + rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xF9) | + ((failover & 0x3) << 1)); +} + +static inline u8 cm_rep_get_flow_ctrl(struct cm_rep_msg *rep_msg) +{ + return (u8) (rep_msg->offset26 & 0x01); +} + +static inline void cm_rep_set_flow_ctrl(struct cm_rep_msg *rep_msg, + u8 flow_ctrl) +{ + rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xFE) | + (flow_ctrl & 0x1)); +} + +static inline u8 cm_rep_get_rnr_retry_count(struct cm_rep_msg *rep_msg) +{ + return (u8) (rep_msg->offset27 >> 5); +} + +static inline void cm_rep_set_rnr_retry_count(struct cm_rep_msg *rep_msg, + u8 rnr_retry_count) +{ + rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0x1F) | + (rnr_retry_count << 5)); +} + +static inline u8 cm_rep_get_srq(struct cm_rep_msg *rep_msg) +{ + return (u8) ((rep_msg->offset27 >> 4) & 0x1); +} + +static inline void cm_rep_set_srq(struct cm_rep_msg *rep_msg, u8 srq) +{ + rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0xEF) | + ((srq & 0x1) << 4)); +} + +struct cm_rtu_msg { + struct ib_mad_hdr hdr; + + u32 local_comm_id; + u32 remote_comm_id; + + u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE]; + +} __attribute__ ((packed)); + +struct cm_dreq_msg { + struct ib_mad_hdr hdr; + + u32 local_comm_id; + u32 remote_comm_id; + /* remote QPN/EECN:24, rsvd:8 */ + u32 offset8; + + u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE]; + +} __attribute__ ((packed)); + +static inline u32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg) +{ + return cpu_to_be32(be32_to_cpu(dreq_msg->offset8) >> 8); +} + +static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, u32 qpn) +{ + dreq_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) | + (be32_to_cpu(dreq_msg->offset8) & 0x000000FF)); +} + +struct cm_drep_msg { + struct ib_mad_hdr hdr; + + u32 local_comm_id; + u32 remote_comm_id; + + u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE]; + +} __attribute__ ((packed)); + +struct cm_lap_msg { + struct ib_mad_hdr hdr; + + u32 local_comm_id; + u32 remote_comm_id; + + u32 rsvd8; + /* remote QPN/EECN:24, remote CM response timeout:5, rsvd:3 */ + u32 offset12; + u32 rsvd16; + + u16 alt_local_lid; + u16 alt_remote_lid; + union ib_gid alt_local_gid; + union ib_gid alt_remote_gid; + /* flow label:20, rsvd:4, traffic class:8 */ + u32 offset56; + u8 alt_hop_limit; + /* rsvd:2, packet rate:6 */ + uint8_t offset61; + /* SL:4, subnet local:1, rsvd:3 */ + uint8_t offset62; + /* local ACK timeout:5, rsvd:3 */ + uint8_t offset63; + + u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE]; +} __attribute__ ((packed)); + +static inline u32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg) +{ + return cpu_to_be32(be32_to_cpu(lap_msg->offset12) >> 8); +} + +static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, u32 qpn) +{ + lap_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) | + (be32_to_cpu(lap_msg->offset12) & + 0x000000FF)); +} + +static inline u8 cm_lap_get_remote_resp_timeout(struct cm_lap_msg *lap_msg) +{ + return (u8) ((be32_to_cpu(lap_msg->offset12) & 0xF8) >> 3); +} + +static inline void cm_lap_set_remote_resp_timeout(struct cm_lap_msg *lap_msg, + u8 resp_timeout) +{ + lap_msg->offset12 = cpu_to_be32((resp_timeout << 3) | + (be32_to_cpu(lap_msg->offset12) & + 0xFFFFFF07)); +} + +static inline u32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg) +{ + return be32_to_cpu(lap_msg->offset56) >> 12; +} + +static inline void cm_lap_set_flow_label(struct cm_lap_msg *lap_msg, + u32 flow_label) +{ + lap_msg->offset56 = cpu_to_be32((flow_label << 12) | + (be32_to_cpu(lap_msg->offset56) & + 0x00000FFF)); +} + +static inline u8 cm_lap_get_traffic_class(struct cm_lap_msg *lap_msg) +{ + return (u8) be32_to_cpu(lap_msg->offset56); +} + +static inline void cm_lap_set_traffic_class(struct cm_lap_msg *lap_msg, + u8 traffic_class) +{ + lap_msg->offset56 = cpu_to_be32(traffic_class | + (be32_to_cpu(lap_msg->offset56) & + 0xFFFFFF00)); +} + +static inline u8 cm_lap_get_packet_rate(struct cm_lap_msg *lap_msg) +{ + return lap_msg->offset61 & 0x3F; +} + +static inline void cm_lap_set_packet_rate(struct cm_lap_msg *lap_msg, + u8 packet_rate) +{ + lap_msg->offset61 = (packet_rate & 0x3F) | (lap_msg->offset61 & 0xC0); +} + +static inline u8 cm_lap_get_sl(struct cm_lap_msg *lap_msg) +{ + return lap_msg->offset62 >> 4; +} + +static inline void cm_lap_set_sl(struct cm_lap_msg *lap_msg, u8 sl) +{ + lap_msg->offset62 = (sl << 4) | (lap_msg->offset62 & 0x0F); +} + +static inline u8 cm_lap_get_subnet_local(struct cm_lap_msg *lap_msg) +{ + return (lap_msg->offset62 >> 3) & 0x1; +} + +static inline void cm_lap_set_subnet_local(struct cm_lap_msg *lap_msg, + u8 subnet_local) +{ + lap_msg->offset62 = ((subnet_local & 0x1) << 3) | + (lap_msg->offset61 & 0xF7); +} +static inline u8 cm_lap_get_local_ack_timeout(struct cm_lap_msg *lap_msg) +{ + return lap_msg->offset63 >> 3; +} + +static inline void cm_lap_set_local_ack_timeout(struct cm_lap_msg *lap_msg, + u8 local_ack_timeout) +{ + lap_msg->offset63 = (local_ack_timeout << 3) | + (lap_msg->offset63 & 0x07); +} + +struct cm_apr_msg { + struct ib_mad_hdr hdr; + + u32 local_comm_id; + u32 remote_comm_id; + + u8 info_length; + u8 ap_status; + u8 info[IB_CM_APR_INFO_LENGTH]; + + u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE]; +} __attribute__ ((packed)); + +struct cm_sidr_req_msg { + struct ib_mad_hdr hdr; + + u32 request_id; + u16 pkey; + u16 rsvd; + u64 service_id; + + u8 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE]; +} __attribute__ ((packed)); + +struct cm_sidr_rep_msg { + struct ib_mad_hdr hdr; + + u32 request_id; + u8 status; + u8 info_length; + u16 rsvd; + /* QPN:24, rsvd:8 */ + u32 offset8; + u64 service_id; + u32 qkey; + u8 info[IB_CM_SIDR_REP_INFO_LENGTH]; + + u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE]; +} __attribute__ ((packed)); + +static inline u32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg) +{ + return cpu_to_be32(be32_to_cpu(sidr_rep_msg->offset8) >> 8); +} + +static inline void cm_sidr_rep_set_qpn(struct cm_sidr_rep_msg *sidr_rep_msg, + u32 qpn) +{ + sidr_rep_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) | + (be32_to_cpu(sidr_rep_msg->offset8) & + 0x000000FF)); +} + +#endif /* CM_MSGS_H */ -- cgit v1.2.3-55-g7522 From cb183a06b381652b7637fedfa7ef85ec0baf2a1f Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:42 -0700 Subject: [PATCH] IB: Implementation for RMPP support in user MAD Implementation for RMPP support in user MAD Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/user_mad.c | 300 ++++++++++++++++++++++++------------- 1 file changed, 194 insertions(+), 106 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 088bb1f0f514..2e38792df533 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -29,7 +31,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: user_mad.c 1389 2004-12-27 22:56:47Z roland $ + * $Id: user_mad.c 2814 2005-07-06 19:14:09Z halr $ */ #include @@ -94,10 +96,12 @@ struct ib_umad_file { }; struct ib_umad_packet { - struct ib_user_mad mad; struct ib_ah *ah; + struct ib_mad_send_buf *msg; struct list_head list; + int length; DECLARE_PCI_UNMAP_ADDR(mapping) + struct ib_user_mad mad; }; static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); @@ -114,10 +118,10 @@ static int queue_packet(struct ib_umad_file *file, int ret = 1; down_read(&file->agent_mutex); - for (packet->mad.id = 0; - packet->mad.id < IB_UMAD_MAX_AGENTS; - packet->mad.id++) - if (agent == file->agent[packet->mad.id]) { + for (packet->mad.hdr.id = 0; + packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; + packet->mad.hdr.id++) + if (agent == file->agent[packet->mad.hdr.id]) { spin_lock_irq(&file->recv_lock); list_add_tail(&packet->list, &file->recv_list); spin_unlock_irq(&file->recv_lock); @@ -135,22 +139,30 @@ static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *send_wc) { struct ib_umad_file *file = agent->context; - struct ib_umad_packet *packet = + struct ib_umad_packet *timeout, *packet = (void *) (unsigned long) send_wc->wr_id; - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(packet, mapping), - sizeof packet->mad.data, - DMA_TO_DEVICE); - ib_destroy_ah(packet->ah); + ib_destroy_ah(packet->msg->send_wr.wr.ud.ah); + ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { - packet->mad.status = ETIMEDOUT; + timeout = kmalloc(sizeof *timeout + sizeof (struct ib_mad_hdr), + GFP_KERNEL); + if (!timeout) + goto out; - if (!queue_packet(file, agent, packet)) - return; - } + memset(timeout, 0, sizeof *timeout + sizeof (struct ib_mad_hdr)); + timeout->length = sizeof (struct ib_mad_hdr); + timeout->mad.hdr.id = packet->mad.hdr.id; + timeout->mad.hdr.status = ETIMEDOUT; + memcpy(timeout->mad.data, packet->mad.data, + sizeof (struct ib_mad_hdr)); + + if (!queue_packet(file, agent, timeout)) + return; + } +out: kfree(packet); } @@ -159,30 +171,35 @@ static void recv_handler(struct ib_mad_agent *agent, { struct ib_umad_file *file = agent->context; struct ib_umad_packet *packet; + int length; if (mad_recv_wc->wc->status != IB_WC_SUCCESS) goto out; - packet = kmalloc(sizeof *packet, GFP_KERNEL); + length = mad_recv_wc->mad_len; + packet = kmalloc(sizeof *packet + length, GFP_KERNEL); if (!packet) goto out; - memset(packet, 0, sizeof *packet); + memset(packet, 0, sizeof *packet + length); + packet->length = length; + + ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data); - memcpy(packet->mad.data, mad_recv_wc->recv_buf.mad, sizeof packet->mad.data); - packet->mad.status = 0; - packet->mad.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); - packet->mad.lid = cpu_to_be16(mad_recv_wc->wc->slid); - packet->mad.sl = mad_recv_wc->wc->sl; - packet->mad.path_bits = mad_recv_wc->wc->dlid_path_bits; - packet->mad.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); - if (packet->mad.grh_present) { + packet->mad.hdr.status = 0; + packet->mad.hdr.length = length + sizeof (struct ib_user_mad); + packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); + packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); + packet->mad.hdr.sl = mad_recv_wc->wc->sl; + packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; + packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); + if (packet->mad.hdr.grh_present) { /* XXX parse GRH */ - packet->mad.gid_index = 0; - packet->mad.hop_limit = 0; - packet->mad.traffic_class = 0; - memset(packet->mad.gid, 0, 16); - packet->mad.flow_label = 0; + packet->mad.hdr.gid_index = 0; + packet->mad.hdr.hop_limit = 0; + packet->mad.hdr.traffic_class = 0; + memset(packet->mad.hdr.gid, 0, 16); + packet->mad.hdr.flow_label = 0; } if (queue_packet(file, agent, packet)) @@ -199,7 +216,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, struct ib_umad_packet *packet; ssize_t ret; - if (count < sizeof (struct ib_user_mad)) + if (count < sizeof (struct ib_user_mad) + sizeof (struct ib_mad)) return -EINVAL; spin_lock_irq(&file->recv_lock); @@ -222,12 +239,25 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, spin_unlock_irq(&file->recv_lock); - if (copy_to_user(buf, &packet->mad, sizeof packet->mad)) + if (count < packet->length + sizeof (struct ib_user_mad)) { + /* Return length needed (and first RMPP segment) if too small */ + if (copy_to_user(buf, &packet->mad, + sizeof (struct ib_user_mad) + sizeof (struct ib_mad))) + ret = -EFAULT; + else + ret = -ENOSPC; + } else if (copy_to_user(buf, &packet->mad, + packet->length + sizeof (struct ib_user_mad))) ret = -EFAULT; else - ret = sizeof packet->mad; - - kfree(packet); + ret = packet->length + sizeof (struct ib_user_mad); + if (ret < 0) { + /* Requeue packet */ + spin_lock_irq(&file->recv_lock); + list_add(&packet->list, &file->recv_list); + spin_unlock_irq(&file->recv_lock); + } else + kfree(packet); return ret; } @@ -238,69 +268,57 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_umad_packet *packet; struct ib_mad_agent *agent; struct ib_ah_attr ah_attr; - struct ib_sge gather_list; - struct ib_send_wr *bad_wr, wr = { - .opcode = IB_WR_SEND, - .sg_list = &gather_list, - .num_sge = 1, - .send_flags = IB_SEND_SIGNALED, - }; + struct ib_send_wr *bad_wr; + struct ib_rmpp_mad *rmpp_mad; u8 method; u64 *tid; - int ret; + int ret, length, hdr_len, data_len, rmpp_hdr_size; + int rmpp_active = 0; if (count < sizeof (struct ib_user_mad)) return -EINVAL; - packet = kmalloc(sizeof *packet, GFP_KERNEL); + length = count - sizeof (struct ib_user_mad); + packet = kmalloc(sizeof *packet + sizeof(struct ib_mad_hdr) + + sizeof(struct ib_rmpp_hdr), GFP_KERNEL); if (!packet) return -ENOMEM; - if (copy_from_user(&packet->mad, buf, sizeof packet->mad)) { - kfree(packet); - return -EFAULT; + if (copy_from_user(&packet->mad, buf, + sizeof (struct ib_user_mad) + + sizeof(struct ib_mad_hdr) + + sizeof(struct ib_rmpp_hdr))) { + ret = -EFAULT; + goto err; } - if (packet->mad.id < 0 || packet->mad.id >= IB_UMAD_MAX_AGENTS) { + if (packet->mad.hdr.id < 0 || + packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { ret = -EINVAL; goto err; } + packet->length = length; + down_read(&file->agent_mutex); - agent = file->agent[packet->mad.id]; + agent = file->agent[packet->mad.hdr.id]; if (!agent) { ret = -EINVAL; goto err_up; } - /* - * If userspace is generating a request that will generate a - * response, we need to make sure the high-order part of the - * transaction ID matches the agent being used to send the - * MAD. - */ - method = ((struct ib_mad_hdr *) packet->mad.data)->method; - - if (!(method & IB_MGMT_METHOD_RESP) && - method != IB_MGMT_METHOD_TRAP_REPRESS && - method != IB_MGMT_METHOD_SEND) { - tid = &((struct ib_mad_hdr *) packet->mad.data)->tid; - *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | - (be64_to_cpup(tid) & 0xffffffff)); - } - memset(&ah_attr, 0, sizeof ah_attr); - ah_attr.dlid = be16_to_cpu(packet->mad.lid); - ah_attr.sl = packet->mad.sl; - ah_attr.src_path_bits = packet->mad.path_bits; + ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid); + ah_attr.sl = packet->mad.hdr.sl; + ah_attr.src_path_bits = packet->mad.hdr.path_bits; ah_attr.port_num = file->port->port_num; - if (packet->mad.grh_present) { + if (packet->mad.hdr.grh_present) { ah_attr.ah_flags = IB_AH_GRH; - memcpy(ah_attr.grh.dgid.raw, packet->mad.gid, 16); - ah_attr.grh.flow_label = packet->mad.flow_label; - ah_attr.grh.hop_limit = packet->mad.hop_limit; - ah_attr.grh.traffic_class = packet->mad.traffic_class; + memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); + ah_attr.grh.flow_label = packet->mad.hdr.flow_label; + ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; + ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; } packet->ah = ib_create_ah(agent->qp->pd, &ah_attr); @@ -309,35 +327,104 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err_up; } - gather_list.addr = dma_map_single(agent->device->dma_device, - packet->mad.data, - sizeof packet->mad.data, - DMA_TO_DEVICE); - gather_list.length = sizeof packet->mad.data; - gather_list.lkey = file->mr[packet->mad.id]->lkey; - pci_unmap_addr_set(packet, mapping, gather_list.addr); + rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; + if (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE) { + /* RMPP active */ + if (!agent->rmpp_version) { + ret = -EINVAL; + goto err_ah; + } + /* Validate that management class can support RMPP */ + if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) { + hdr_len = offsetof(struct ib_sa_mad, data); + data_len = length; + } else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && + (rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) { + hdr_len = offsetof(struct ib_vendor_mad, data); + data_len = length - hdr_len; + } else { + ret = -EINVAL; + goto err_ah; + } + rmpp_active = 1; + } else { + if (length > sizeof(struct ib_mad)) { + ret = -EINVAL; + goto err_ah; + } + hdr_len = offsetof(struct ib_mad, data); + data_len = length - hdr_len; + } + + packet->msg = ib_create_send_mad(agent, + be32_to_cpu(packet->mad.hdr.qpn), + 0, packet->ah, rmpp_active, + hdr_len, data_len, + GFP_KERNEL); + if (IS_ERR(packet->msg)) { + ret = PTR_ERR(packet->msg); + goto err_ah; + } - wr.wr.ud.mad_hdr = (struct ib_mad_hdr *) packet->mad.data; - wr.wr.ud.ah = packet->ah; - wr.wr.ud.remote_qpn = be32_to_cpu(packet->mad.qpn); - wr.wr.ud.remote_qkey = be32_to_cpu(packet->mad.qkey); - wr.wr.ud.timeout_ms = packet->mad.timeout_ms; - wr.wr.ud.retries = 0; + packet->msg->send_wr.wr.ud.timeout_ms = packet->mad.hdr.timeout_ms; + packet->msg->send_wr.wr.ud.retries = packet->mad.hdr.retries; - wr.wr_id = (unsigned long) packet; + /* Override send WR WRID initialized in ib_create_send_mad */ + packet->msg->send_wr.wr_id = (unsigned long) packet; - ret = ib_post_send_mad(agent, &wr, &bad_wr); - if (ret) { - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(packet, mapping), - sizeof packet->mad.data, - DMA_TO_DEVICE); - goto err_up; + if (!rmpp_active) { + /* Copy message from user into send buffer */ + if (copy_from_user(packet->msg->mad, + buf + sizeof(struct ib_user_mad), length)) { + ret = -EFAULT; + goto err_msg; + } + } else { + rmpp_hdr_size = sizeof(struct ib_mad_hdr) + + sizeof(struct ib_rmpp_hdr); + + /* Only copy MAD headers (RMPP header in place) */ + memcpy(packet->msg->mad, packet->mad.data, + sizeof(struct ib_mad_hdr)); + + /* Now, copy rest of message from user into send buffer */ + if (copy_from_user(((struct ib_rmpp_mad *) packet->msg->mad)->data, + buf + sizeof (struct ib_user_mad) + rmpp_hdr_size, + length - rmpp_hdr_size)) { + ret = -EFAULT; + goto err_msg; + } + } + + /* + * If userspace is generating a request that will generate a + * response, we need to make sure the high-order part of the + * transaction ID matches the agent being used to send the + * MAD. + */ + method = packet->msg->mad->mad_hdr.method; + + if (!(method & IB_MGMT_METHOD_RESP) && + method != IB_MGMT_METHOD_TRAP_REPRESS && + method != IB_MGMT_METHOD_SEND) { + tid = &packet->msg->mad->mad_hdr.tid; + *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | + (be64_to_cpup(tid) & 0xffffffff)); } + ret = ib_post_send_mad(agent, &packet->msg->send_wr, &bad_wr); + if (ret) + goto err_msg; + up_read(&file->agent_mutex); - return sizeof packet->mad; + return sizeof (struct ib_user_mad_hdr) + packet->length; + +err_msg: + ib_free_send_mad(packet->msg); + +err_ah: + ib_destroy_ah(packet->ah); err_up: up_read(&file->agent_mutex); @@ -400,7 +487,8 @@ found: agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, ureq.mgmt_class ? &req : NULL, - 0, send_handler, recv_handler, file); + ureq.rmpp_version, + send_handler, recv_handler, file); if (IS_ERR(agent)) { ret = PTR_ERR(agent); goto out; @@ -461,8 +549,8 @@ out: return ret; } -static long ib_umad_ioctl(struct file *filp, - unsigned int cmd, unsigned long arg) +static long ib_umad_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) { switch (cmd) { case IB_USER_MAD_REGISTER_AGENT: @@ -518,14 +606,14 @@ static int ib_umad_close(struct inode *inode, struct file *filp) } static struct file_operations umad_fops = { - .owner = THIS_MODULE, - .read = ib_umad_read, - .write = ib_umad_write, - .poll = ib_umad_poll, + .owner = THIS_MODULE, + .read = ib_umad_read, + .write = ib_umad_write, + .poll = ib_umad_poll, .unlocked_ioctl = ib_umad_ioctl, - .compat_ioctl = ib_umad_ioctl, - .open = ib_umad_open, - .release = ib_umad_close + .compat_ioctl = ib_umad_ioctl, + .open = ib_umad_open, + .release = ib_umad_close }; static int ib_umad_sm_open(struct inode *inode, struct file *filp) -- cgit v1.2.3-55-g7522 From a5b74540770cb28b8ae779d0c27e228fe7500669 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:44 -0700 Subject: [PATCH] IB: Add kernel portion of user CM implementation Add kernel portion of user CM implementation Signed-off-by: Libor Michalek Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/ucm.c | 1396 +++++++++++++++++++++++++++++++++++++++++ drivers/infiniband/core/ucm.h | 89 +++ 2 files changed, 1485 insertions(+) create mode 100644 drivers/infiniband/core/ucm.c create mode 100644 drivers/infiniband/core/ucm.h (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c new file mode 100644 index 000000000000..4b4808a0be43 --- /dev/null +++ b/drivers/infiniband/core/ucm.c @@ -0,0 +1,1396 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ucm.c 2594 2005-06-13 19:46:02Z libor $ + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "ucm.h" + +MODULE_AUTHOR("Libor Michalek"); +MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); +MODULE_LICENSE("Dual BSD/GPL"); + +enum { + IB_UCM_MAJOR = 231, + IB_UCM_MINOR = 255 +}; + +#define IB_UCM_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_MINOR) + +static struct semaphore ctx_id_mutex; +static struct idr ctx_id_table; +static int ctx_id_rover = 0; + +static struct ib_ucm_context *ib_ucm_ctx_get(int id) +{ + struct ib_ucm_context *ctx; + + down(&ctx_id_mutex); + ctx = idr_find(&ctx_id_table, id); + if (ctx) + ctx->ref++; + up(&ctx_id_mutex); + + return ctx; +} + +static void ib_ucm_ctx_put(struct ib_ucm_context *ctx) +{ + struct ib_ucm_event *uevent; + + down(&ctx_id_mutex); + + ctx->ref--; + if (!ctx->ref) + idr_remove(&ctx_id_table, ctx->id); + + up(&ctx_id_mutex); + + if (ctx->ref) + return; + + down(&ctx->file->mutex); + + list_del(&ctx->file_list); + while (!list_empty(&ctx->events)) { + + uevent = list_entry(ctx->events.next, + struct ib_ucm_event, ctx_list); + list_del(&uevent->file_list); + list_del(&uevent->ctx_list); + + /* clear incoming connections. */ + if (uevent->cm_id) + ib_destroy_cm_id(uevent->cm_id); + + kfree(uevent); + } + + up(&ctx->file->mutex); + + printk(KERN_ERR "UCM: Destroyed CM ID <%d>\n", ctx->id); + + ib_destroy_cm_id(ctx->cm_id); + kfree(ctx); +} + +static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) +{ + struct ib_ucm_context *ctx; + int result; + + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + ctx->ref = 1; /* user reference */ + ctx->file = file; + + INIT_LIST_HEAD(&ctx->events); + init_MUTEX(&ctx->mutex); + + list_add_tail(&ctx->file_list, &file->ctxs); + + ctx_id_rover = (ctx_id_rover + 1) & INT_MAX; +retry: + result = idr_pre_get(&ctx_id_table, GFP_KERNEL); + if (!result) + goto error; + + down(&ctx_id_mutex); + result = idr_get_new_above(&ctx_id_table, ctx, ctx_id_rover, &ctx->id); + up(&ctx_id_mutex); + + if (result == -EAGAIN) + goto retry; + if (result) + goto error; + + printk(KERN_ERR "UCM: Allocated CM ID <%d>\n", ctx->id); + + return ctx; +error: + list_del(&ctx->file_list); + kfree(ctx); + + return NULL; +} +/* + * Event portion of the API, handle CM events + * and allow event polling. + */ +static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath, + struct ib_sa_path_rec *kpath) +{ + if (!kpath || !upath) + return; + + memcpy(upath->dgid, kpath->dgid.raw, sizeof(union ib_gid)); + memcpy(upath->sgid, kpath->sgid.raw, sizeof(union ib_gid)); + + upath->dlid = kpath->dlid; + upath->slid = kpath->slid; + upath->raw_traffic = kpath->raw_traffic; + upath->flow_label = kpath->flow_label; + upath->hop_limit = kpath->hop_limit; + upath->traffic_class = kpath->traffic_class; + upath->reversible = kpath->reversible; + upath->numb_path = kpath->numb_path; + upath->pkey = kpath->pkey; + upath->sl = kpath->sl; + upath->mtu_selector = kpath->mtu_selector; + upath->mtu = kpath->mtu; + upath->rate_selector = kpath->rate_selector; + upath->rate = kpath->rate; + upath->packet_life_time = kpath->packet_life_time; + upath->preference = kpath->preference; + + upath->packet_life_time_selector = + kpath->packet_life_time_selector; +} + +static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, + struct ib_cm_req_event_param *kreq) +{ + ureq->listen_id = (long)kreq->listen_id->context; + + ureq->remote_ca_guid = kreq->remote_ca_guid; + ureq->remote_qkey = kreq->remote_qkey; + ureq->remote_qpn = kreq->remote_qpn; + ureq->qp_type = kreq->qp_type; + ureq->starting_psn = kreq->starting_psn; + ureq->responder_resources = kreq->responder_resources; + ureq->initiator_depth = kreq->initiator_depth; + ureq->local_cm_response_timeout = kreq->local_cm_response_timeout; + ureq->flow_control = kreq->flow_control; + ureq->remote_cm_response_timeout = kreq->remote_cm_response_timeout; + ureq->retry_count = kreq->retry_count; + ureq->rnr_retry_count = kreq->rnr_retry_count; + ureq->srq = kreq->srq; + + ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path); + ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path); +} + +static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, + struct ib_cm_rep_event_param *krep) +{ + urep->remote_ca_guid = krep->remote_ca_guid; + urep->remote_qkey = krep->remote_qkey; + urep->remote_qpn = krep->remote_qpn; + urep->starting_psn = krep->starting_psn; + urep->responder_resources = krep->responder_resources; + urep->initiator_depth = krep->initiator_depth; + urep->target_ack_delay = krep->target_ack_delay; + urep->failover_accepted = krep->failover_accepted; + urep->flow_control = krep->flow_control; + urep->rnr_retry_count = krep->rnr_retry_count; + urep->srq = krep->srq; +} + +static void ib_ucm_event_rej_get(struct ib_ucm_rej_event_resp *urej, + struct ib_cm_rej_event_param *krej) +{ + urej->reason = krej->reason; +} + +static void ib_ucm_event_mra_get(struct ib_ucm_mra_event_resp *umra, + struct ib_cm_mra_event_param *kmra) +{ + umra->timeout = kmra->service_timeout; +} + +static void ib_ucm_event_lap_get(struct ib_ucm_lap_event_resp *ulap, + struct ib_cm_lap_event_param *klap) +{ + ib_ucm_event_path_get(&ulap->path, klap->alternate_path); +} + +static void ib_ucm_event_apr_get(struct ib_ucm_apr_event_resp *uapr, + struct ib_cm_apr_event_param *kapr) +{ + uapr->status = kapr->ap_status; +} + +static void ib_ucm_event_sidr_req_get(struct ib_ucm_sidr_req_event_resp *ureq, + struct ib_cm_sidr_req_event_param *kreq) +{ + ureq->listen_id = (long)kreq->listen_id->context; + ureq->pkey = kreq->pkey; +} + +static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, + struct ib_cm_sidr_rep_event_param *krep) +{ + urep->status = krep->status; + urep->qkey = krep->qkey; + urep->qpn = krep->qpn; +}; + +static int ib_ucm_event_process(struct ib_cm_event *evt, + struct ib_ucm_event *uvt) +{ + void *info = NULL; + int result; + + switch (evt->event) { + case IB_CM_REQ_RECEIVED: + ib_ucm_event_req_get(&uvt->resp.u.req_resp, + &evt->param.req_rcvd); + uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE; + uvt->resp.present |= (evt->param.req_rcvd.primary_path ? + IB_UCM_PRES_PRIMARY : 0); + uvt->resp.present |= (evt->param.req_rcvd.alternate_path ? + IB_UCM_PRES_ALTERNATE : 0); + break; + case IB_CM_REP_RECEIVED: + ib_ucm_event_rep_get(&uvt->resp.u.rep_resp, + &evt->param.rep_rcvd); + uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE; + + break; + case IB_CM_RTU_RECEIVED: + uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE; + uvt->resp.u.send_status = evt->param.send_status; + + break; + case IB_CM_DREQ_RECEIVED: + uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE; + uvt->resp.u.send_status = evt->param.send_status; + + break; + case IB_CM_DREP_RECEIVED: + uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE; + uvt->resp.u.send_status = evt->param.send_status; + + break; + case IB_CM_MRA_RECEIVED: + ib_ucm_event_mra_get(&uvt->resp.u.mra_resp, + &evt->param.mra_rcvd); + uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE; + + break; + case IB_CM_REJ_RECEIVED: + ib_ucm_event_rej_get(&uvt->resp.u.rej_resp, + &evt->param.rej_rcvd); + uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; + uvt->info_len = evt->param.rej_rcvd.ari_length; + info = evt->param.rej_rcvd.ari; + + break; + case IB_CM_LAP_RECEIVED: + ib_ucm_event_lap_get(&uvt->resp.u.lap_resp, + &evt->param.lap_rcvd); + uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE; + uvt->resp.present |= (evt->param.lap_rcvd.alternate_path ? + IB_UCM_PRES_ALTERNATE : 0); + break; + case IB_CM_APR_RECEIVED: + ib_ucm_event_apr_get(&uvt->resp.u.apr_resp, + &evt->param.apr_rcvd); + uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE; + uvt->info_len = evt->param.apr_rcvd.info_len; + info = evt->param.apr_rcvd.apr_info; + + break; + case IB_CM_SIDR_REQ_RECEIVED: + ib_ucm_event_sidr_req_get(&uvt->resp.u.sidr_req_resp, + &evt->param.sidr_req_rcvd); + uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; + + break; + case IB_CM_SIDR_REP_RECEIVED: + ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp, + &evt->param.sidr_rep_rcvd); + uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; + uvt->info_len = evt->param.sidr_rep_rcvd.info_len; + info = evt->param.sidr_rep_rcvd.info; + + break; + default: + uvt->resp.u.send_status = evt->param.send_status; + + break; + } + + if (uvt->data_len && evt->private_data) { + + uvt->data = kmalloc(uvt->data_len, GFP_KERNEL); + if (!uvt->data) { + result = -ENOMEM; + goto error; + } + + memcpy(uvt->data, evt->private_data, uvt->data_len); + uvt->resp.present |= IB_UCM_PRES_DATA; + } + + if (uvt->info_len && info) { + + uvt->info = kmalloc(uvt->info_len, GFP_KERNEL); + if (!uvt->info) { + result = -ENOMEM; + goto error; + } + + memcpy(uvt->info, info, uvt->info_len); + uvt->resp.present |= IB_UCM_PRES_INFO; + } + + return 0; +error: + if (uvt->info) + kfree(uvt->info); + if (uvt->data) + kfree(uvt->data); + return result; +} + +static int ib_ucm_event_handler(struct ib_cm_id *cm_id, + struct ib_cm_event *event) +{ + struct ib_ucm_event *uevent; + struct ib_ucm_context *ctx; + int result = 0; + int id; + /* + * lookup correct context based on event type. + */ + switch (event->event) { + case IB_CM_REQ_RECEIVED: + id = (long)event->param.req_rcvd.listen_id->context; + break; + case IB_CM_SIDR_REQ_RECEIVED: + id = (long)event->param.sidr_req_rcvd.listen_id->context; + break; + default: + id = (long)cm_id->context; + break; + } + + printk(KERN_ERR "UCM: Event. CM ID <%d> event <%d>\n", + id, event->event); + + ctx = ib_ucm_ctx_get(id); + if (!ctx) + return -ENOENT; + + if (event->event == IB_CM_REQ_RECEIVED || + event->event == IB_CM_SIDR_REQ_RECEIVED) + id = IB_UCM_CM_ID_INVALID; + + uevent = kmalloc(sizeof(*uevent), GFP_KERNEL); + if (!uevent) { + result = -ENOMEM; + goto done; + } + + memset(uevent, 0, sizeof(*uevent)); + + uevent->resp.id = id; + uevent->resp.event = event->event; + + result = ib_ucm_event_process(event, uevent); + if (result) + goto done; + + uevent->ctx = ctx; + uevent->cm_id = ((event->event == IB_CM_REQ_RECEIVED || + event->event == IB_CM_SIDR_REQ_RECEIVED ) ? + cm_id : NULL); + + down(&ctx->file->mutex); + + list_add_tail(&uevent->file_list, &ctx->file->events); + list_add_tail(&uevent->ctx_list, &ctx->events); + + wake_up_interruptible(&ctx->file->poll_wait); + + up(&ctx->file->mutex); +done: + ctx->error = result; + ib_ucm_ctx_put(ctx); /* func reference */ + return result; +} + +static ssize_t ib_ucm_event(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_context *ctx; + struct ib_ucm_event_get cmd; + struct ib_ucm_event *uevent = NULL; + int result = 0; + DEFINE_WAIT(wait); + + if (out_len < sizeof(struct ib_ucm_event_resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + /* + * wait + */ + down(&file->mutex); + + while (list_empty(&file->events)) { + + if (file->filp->f_flags & O_NONBLOCK) { + result = -EAGAIN; + break; + } + + if (signal_pending(current)) { + result = -ERESTARTSYS; + break; + } + + prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE); + + up(&file->mutex); + schedule(); + down(&file->mutex); + + finish_wait(&file->poll_wait, &wait); + } + + if (result) + goto done; + + uevent = list_entry(file->events.next, struct ib_ucm_event, file_list); + + if (!uevent->cm_id) + goto user; + + ctx = ib_ucm_ctx_alloc(file); + if (!ctx) { + result = -ENOMEM; + goto done; + } + + ctx->cm_id = uevent->cm_id; + ctx->cm_id->cm_handler = ib_ucm_event_handler; + ctx->cm_id->context = (void *)(unsigned long)ctx->id; + + uevent->resp.id = ctx->id; + +user: + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &uevent->resp, sizeof(uevent->resp))) { + result = -EFAULT; + goto done; + } + + if (uevent->data) { + + if (cmd.data_len < uevent->data_len) { + result = -ENOMEM; + goto done; + } + + if (copy_to_user((void __user *)(unsigned long)cmd.data, + uevent->data, uevent->data_len)) { + result = -EFAULT; + goto done; + } + } + + if (uevent->info) { + + if (cmd.info_len < uevent->info_len) { + result = -ENOMEM; + goto done; + } + + if (copy_to_user((void __user *)(unsigned long)cmd.info, + uevent->info, uevent->info_len)) { + result = -EFAULT; + goto done; + } + } + + list_del(&uevent->file_list); + list_del(&uevent->ctx_list); + + if (uevent->data) + kfree(uevent->data); + if (uevent->info) + kfree(uevent->info); + kfree(uevent); +done: + up(&file->mutex); + return result; +} + + +static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_create_id cmd; + struct ib_ucm_create_id_resp resp; + struct ib_ucm_context *ctx; + int result; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ib_ucm_ctx_alloc(file); + if (!ctx) + return -ENOMEM; + + ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, + (void *)(unsigned long)ctx->id); + if (!ctx->cm_id) { + result = -ENOMEM; + goto err_cm; + } + + resp.id = ctx->id; + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) { + result = -EFAULT; + goto err_ret; + } + + return 0; +err_ret: + ib_destroy_cm_id(ctx->cm_id); +err_cm: + ib_ucm_ctx_put(ctx); /* user reference */ + + return result; +} + +static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_destroy_id cmd; + struct ib_ucm_context *ctx; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ib_ucm_ctx_get(cmd.id); + if (!ctx) + return -ENOENT; + + ib_ucm_ctx_put(ctx); /* user reference */ + ib_ucm_ctx_put(ctx); /* func reference */ + + return 0; +} + +static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_attr_id_resp resp; + struct ib_ucm_attr_id cmd; + struct ib_ucm_context *ctx; + int result = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ib_ucm_ctx_get(cmd.id); + if (!ctx) + return -ENOENT; + + down(&ctx->file->mutex); + if (ctx->file != file) { + result = -EINVAL; + goto done; + } + + resp.service_id = ctx->cm_id->service_id; + resp.service_mask = ctx->cm_id->service_mask; + resp.local_id = ctx->cm_id->local_id; + resp.remote_id = ctx->cm_id->remote_id; + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + result = -EFAULT; + +done: + up(&ctx->file->mutex); + ib_ucm_ctx_put(ctx); /* func reference */ + return result; +} + +static ssize_t ib_ucm_listen(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_listen cmd; + struct ib_ucm_context *ctx; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ib_ucm_ctx_get(cmd.id); + if (!ctx) + return -ENOENT; + + down(&ctx->file->mutex); + if (ctx->file != file) + result = -EINVAL; + else + result = ib_cm_listen(ctx->cm_id, cmd.service_id, + cmd.service_mask); + + up(&ctx->file->mutex); + ib_ucm_ctx_put(ctx); /* func reference */ + return result; +} + +static ssize_t ib_ucm_establish(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_establish cmd; + struct ib_ucm_context *ctx; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ib_ucm_ctx_get(cmd.id); + if (!ctx) + return -ENOENT; + + down(&ctx->file->mutex); + if (ctx->file != file) + result = -EINVAL; + else + result = ib_cm_establish(ctx->cm_id); + + up(&ctx->file->mutex); + ib_ucm_ctx_put(ctx); /* func reference */ + return result; +} + +static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len) +{ + void *data; + + *dest = NULL; + + if (!len) + return 0; + + data = kmalloc(len, GFP_KERNEL); + if (!data) + return -ENOMEM; + + if (copy_from_user(data, (void __user *)(unsigned long)src, len)) { + kfree(data); + return -EFAULT; + } + + *dest = data; + return 0; +} + +static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src) +{ + struct ib_ucm_path_rec ucm_path; + struct ib_sa_path_rec *sa_path; + + *path = NULL; + + if (!src) + return 0; + + sa_path = kmalloc(sizeof(*sa_path), GFP_KERNEL); + if (!sa_path) + return -ENOMEM; + + if (copy_from_user(&ucm_path, (void __user *)(unsigned long)src, + sizeof(ucm_path))) { + + kfree(sa_path); + return -EFAULT; + } + + memcpy(sa_path->dgid.raw, ucm_path.dgid, sizeof(union ib_gid)); + memcpy(sa_path->sgid.raw, ucm_path.sgid, sizeof(union ib_gid)); + + sa_path->dlid = ucm_path.dlid; + sa_path->slid = ucm_path.slid; + sa_path->raw_traffic = ucm_path.raw_traffic; + sa_path->flow_label = ucm_path.flow_label; + sa_path->hop_limit = ucm_path.hop_limit; + sa_path->traffic_class = ucm_path.traffic_class; + sa_path->reversible = ucm_path.reversible; + sa_path->numb_path = ucm_path.numb_path; + sa_path->pkey = ucm_path.pkey; + sa_path->sl = ucm_path.sl; + sa_path->mtu_selector = ucm_path.mtu_selector; + sa_path->mtu = ucm_path.mtu; + sa_path->rate_selector = ucm_path.rate_selector; + sa_path->rate = ucm_path.rate; + sa_path->packet_life_time = ucm_path.packet_life_time; + sa_path->preference = ucm_path.preference; + + sa_path->packet_life_time_selector = + ucm_path.packet_life_time_selector; + + *path = sa_path; + return 0; +} + +static ssize_t ib_ucm_send_req(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_cm_req_param param; + struct ib_ucm_context *ctx; + struct ib_ucm_req cmd; + int result; + + param.private_data = NULL; + param.primary_path = NULL; + param.alternate_path = NULL; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); + if (result) + goto done; + + result = ib_ucm_path_get(¶m.primary_path, cmd.primary_path); + if (result) + goto done; + + result = ib_ucm_path_get(¶m.alternate_path, cmd.alternate_path); + if (result) + goto done; + + param.private_data_len = cmd.len; + param.service_id = cmd.sid; + param.qp_num = cmd.qpn; + param.qp_type = cmd.qp_type; + param.starting_psn = cmd.psn; + param.peer_to_peer = cmd.peer_to_peer; + param.responder_resources = cmd.responder_resources; + param.initiator_depth = cmd.initiator_depth; + param.remote_cm_response_timeout = cmd.remote_cm_response_timeout; + param.flow_control = cmd.flow_control; + param.local_cm_response_timeout = cmd.local_cm_response_timeout; + param.retry_count = cmd.retry_count; + param.rnr_retry_count = cmd.rnr_retry_count; + param.max_cm_retries = cmd.max_cm_retries; + param.srq = cmd.srq; + + ctx = ib_ucm_ctx_get(cmd.id); + if (!ctx) { + result = -ENOENT; + goto done; + } + + down(&ctx->file->mutex); + if (ctx->file != file) + result = -EINVAL; + else + result = ib_send_cm_req(ctx->cm_id, ¶m); + + up(&ctx->file->mutex); + ib_ucm_ctx_put(ctx); /* func reference */ +done: + if (param.private_data) + kfree(param.private_data); + if (param.primary_path) + kfree(param.primary_path); + if (param.alternate_path) + kfree(param.alternate_path); + + return result; +} + +static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_cm_rep_param param; + struct ib_ucm_context *ctx; + struct ib_ucm_rep cmd; + int result; + + param.private_data = NULL; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); + if (result) + return result; + + param.qp_num = cmd.qpn; + param.starting_psn = cmd.psn; + param.private_data_len = cmd.len; + param.responder_resources = cmd.responder_resources; + param.initiator_depth = cmd.initiator_depth; + param.target_ack_delay = cmd.target_ack_delay; + param.failover_accepted = cmd.failover_accepted; + param.flow_control = cmd.flow_control; + param.rnr_retry_count = cmd.rnr_retry_count; + param.srq = cmd.srq; + + ctx = ib_ucm_ctx_get(cmd.id); + if (!ctx) { + result = -ENOENT; + goto done; + } + + down(&ctx->file->mutex); + if (ctx->file != file) + result = -EINVAL; + else + result = ib_send_cm_rep(ctx->cm_id, ¶m); + + up(&ctx->file->mutex); + ib_ucm_ctx_put(ctx); /* func reference */ +done: + if (param.private_data) + kfree(param.private_data); + + return result; +} + +static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file, + const char __user *inbuf, int in_len, + int (*func)(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len)) +{ + struct ib_ucm_private_data cmd; + struct ib_ucm_context *ctx; + const void *private_data = NULL; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(&private_data, cmd.data, cmd.len); + if (result) + return result; + + ctx = ib_ucm_ctx_get(cmd.id); + if (!ctx) { + result = -ENOENT; + goto done; + } + + down(&ctx->file->mutex); + if (ctx->file != file) + result = -EINVAL; + else + result = func(ctx->cm_id, private_data, cmd.len); + + up(&ctx->file->mutex); + ib_ucm_ctx_put(ctx); /* func reference */ +done: + if (private_data) + kfree(private_data); + + return result; +} + +static ssize_t ib_ucm_send_rtu(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_rtu); +} + +static ssize_t ib_ucm_send_dreq(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_dreq); +} + +static ssize_t ib_ucm_send_drep(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_drep); +} + +static ssize_t ib_ucm_send_info(struct ib_ucm_file *file, + const char __user *inbuf, int in_len, + int (*func)(struct ib_cm_id *cm_id, + int status, + const void *info, + u8 info_len, + const void *data, + u8 data_len)) +{ + struct ib_ucm_context *ctx; + struct ib_ucm_info cmd; + const void *data = NULL; + const void *info = NULL; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(&data, cmd.data, cmd.data_len); + if (result) + goto done; + + result = ib_ucm_alloc_data(&info, cmd.info, cmd.info_len); + if (result) + goto done; + + ctx = ib_ucm_ctx_get(cmd.id); + if (!ctx) { + result = -ENOENT; + goto done; + } + + down(&ctx->file->mutex); + if (ctx->file != file) + result = -EINVAL; + else + result = func(ctx->cm_id, cmd.status, + info, cmd.info_len, + data, cmd.data_len); + + up(&ctx->file->mutex); + ib_ucm_ctx_put(ctx); /* func reference */ +done: + if (data) + kfree(data); + if (info) + kfree(info); + + return result; +} + +static ssize_t ib_ucm_send_rej(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_rej); +} + +static ssize_t ib_ucm_send_apr(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_apr); +} + +static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_context *ctx; + struct ib_ucm_mra cmd; + const void *data = NULL; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); + if (result) + return result; + + ctx = ib_ucm_ctx_get(cmd.id); + if (!ctx) { + result = -ENOENT; + goto done; + } + + down(&ctx->file->mutex); + if (ctx->file != file) + result = -EINVAL; + else + result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, + data, cmd.len); + + up(&ctx->file->mutex); + ib_ucm_ctx_put(ctx); /* func reference */ +done: + if (data) + kfree(data); + + return result; +} + +static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_context *ctx; + struct ib_sa_path_rec *path = NULL; + struct ib_ucm_lap cmd; + const void *data = NULL; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); + if (result) + goto done; + + result = ib_ucm_path_get(&path, cmd.path); + if (result) + goto done; + + ctx = ib_ucm_ctx_get(cmd.id); + if (!ctx) { + result = -ENOENT; + goto done; + } + + down(&ctx->file->mutex); + if (ctx->file != file) + result = -EINVAL; + else + result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len); + + up(&ctx->file->mutex); + ib_ucm_ctx_put(ctx); /* func reference */ +done: + if (data) + kfree(data); + if (path) + kfree(path); + + return result; +} + +static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_cm_sidr_req_param param; + struct ib_ucm_context *ctx; + struct ib_ucm_sidr_req cmd; + int result; + + param.private_data = NULL; + param.path = NULL; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); + if (result) + goto done; + + result = ib_ucm_path_get(¶m.path, cmd.path); + if (result) + goto done; + + param.private_data_len = cmd.len; + param.service_id = cmd.sid; + param.timeout_ms = cmd.timeout; + param.max_cm_retries = cmd.max_cm_retries; + param.pkey = cmd.pkey; + + ctx = ib_ucm_ctx_get(cmd.id); + if (!ctx) { + result = -ENOENT; + goto done; + } + + down(&ctx->file->mutex); + if (ctx->file != file) + result = -EINVAL; + else + result = ib_send_cm_sidr_req(ctx->cm_id, ¶m); + + up(&ctx->file->mutex); + ib_ucm_ctx_put(ctx); /* func reference */ +done: + if (param.private_data) + kfree(param.private_data); + if (param.path) + kfree(param.path); + + return result; +} + +static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_cm_sidr_rep_param param; + struct ib_ucm_sidr_rep cmd; + struct ib_ucm_context *ctx; + int result; + + param.info = NULL; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(¶m.private_data, + cmd.data, cmd.data_len); + if (result) + goto done; + + result = ib_ucm_alloc_data(¶m.info, cmd.info, cmd.info_len); + if (result) + goto done; + + param.qp_num = cmd.qpn; + param.qkey = cmd.qkey; + param.status = cmd.status; + param.info_length = cmd.info_len; + param.private_data_len = cmd.data_len; + + ctx = ib_ucm_ctx_get(cmd.id); + if (!ctx) { + result = -ENOENT; + goto done; + } + + down(&ctx->file->mutex); + if (ctx->file != file) + result = -EINVAL; + else + result = ib_send_cm_sidr_rep(ctx->cm_id, ¶m); + + up(&ctx->file->mutex); + ib_ucm_ctx_put(ctx); /* func reference */ +done: + if (param.private_data) + kfree(param.private_data); + if (param.info) + kfree(param.info); + + return result; +} + +static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) = { + [IB_USER_CM_CMD_CREATE_ID] = ib_ucm_create_id, + [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id, + [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id, + [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen, + [IB_USER_CM_CMD_ESTABLISH] = ib_ucm_establish, + [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req, + [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep, + [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu, + [IB_USER_CM_CMD_SEND_DREQ] = ib_ucm_send_dreq, + [IB_USER_CM_CMD_SEND_DREP] = ib_ucm_send_drep, + [IB_USER_CM_CMD_SEND_REJ] = ib_ucm_send_rej, + [IB_USER_CM_CMD_SEND_MRA] = ib_ucm_send_mra, + [IB_USER_CM_CMD_SEND_LAP] = ib_ucm_send_lap, + [IB_USER_CM_CMD_SEND_APR] = ib_ucm_send_apr, + [IB_USER_CM_CMD_SEND_SIDR_REQ] = ib_ucm_send_sidr_req, + [IB_USER_CM_CMD_SEND_SIDR_REP] = ib_ucm_send_sidr_rep, + [IB_USER_CM_CMD_EVENT] = ib_ucm_event, +}; + +static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, + size_t len, loff_t *pos) +{ + struct ib_ucm_file *file = filp->private_data; + struct ib_ucm_cmd_hdr hdr; + ssize_t result; + + if (len < sizeof(hdr)) + return -EINVAL; + + if (copy_from_user(&hdr, buf, sizeof(hdr))) + return -EFAULT; + + printk(KERN_ERR "UCM: Write. cmd <%d> in <%d> out <%d> len <%Zu>\n", + hdr.cmd, hdr.in, hdr.out, len); + + if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) + return -EINVAL; + + if (hdr.in + sizeof(hdr) > len) + return -EINVAL; + + result = ucm_cmd_table[hdr.cmd](file, buf + sizeof(hdr), + hdr.in, hdr.out); + if (!result) + result = len; + + return result; +} + +static unsigned int ib_ucm_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct ib_ucm_file *file = filp->private_data; + unsigned int mask = 0; + + poll_wait(filp, &file->poll_wait, wait); + + if (!list_empty(&file->events)) + mask = POLLIN | POLLRDNORM; + + return mask; +} + +static int ib_ucm_open(struct inode *inode, struct file *filp) +{ + struct ib_ucm_file *file; + + file = kmalloc(sizeof(*file), GFP_KERNEL); + if (!file) + return -ENOMEM; + + INIT_LIST_HEAD(&file->events); + INIT_LIST_HEAD(&file->ctxs); + init_waitqueue_head(&file->poll_wait); + + init_MUTEX(&file->mutex); + + filp->private_data = file; + file->filp = filp; + + printk(KERN_ERR "UCM: Created struct\n"); + + return 0; +} + +static int ib_ucm_close(struct inode *inode, struct file *filp) +{ + struct ib_ucm_file *file = filp->private_data; + struct ib_ucm_context *ctx; + + down(&file->mutex); + + while (!list_empty(&file->ctxs)) { + + ctx = list_entry(file->ctxs.next, + struct ib_ucm_context, file_list); + + up(&ctx->file->mutex); + ib_ucm_ctx_put(ctx); /* user reference */ + down(&file->mutex); + } + + up(&file->mutex); + + kfree(file); + + printk(KERN_ERR "UCM: Deleted struct\n"); + return 0; +} + +static struct file_operations ib_ucm_fops = { + .owner = THIS_MODULE, + .open = ib_ucm_open, + .release = ib_ucm_close, + .write = ib_ucm_write, + .poll = ib_ucm_poll, +}; + + +static struct class_simple *ib_ucm_class; +static struct cdev ib_ucm_cdev; + +static int __init ib_ucm_init(void) +{ + int result; + + result = register_chrdev_region(IB_UCM_DEV, 1, "infiniband_cm"); + if (result) { + printk(KERN_ERR "UCM: Error <%d> registering dev\n", result); + goto err_chr; + } + + cdev_init(&ib_ucm_cdev, &ib_ucm_fops); + + result = cdev_add(&ib_ucm_cdev, IB_UCM_DEV, 1); + if (result) { + printk(KERN_ERR "UCM: Error <%d> adding cdev\n", result); + goto err_cdev; + } + + ib_ucm_class = class_simple_create(THIS_MODULE, "infiniband_cm"); + if (IS_ERR(ib_ucm_class)) { + result = PTR_ERR(ib_ucm_class); + printk(KERN_ERR "UCM: Error <%d> creating class\n", result); + goto err_class; + } + + class_simple_device_add(ib_ucm_class, + IB_UCM_DEV, + NULL, + "ucm"); + + idr_init(&ctx_id_table); + init_MUTEX(&ctx_id_mutex); + + return 0; +err_class: + cdev_del(&ib_ucm_cdev); +err_cdev: + unregister_chrdev_region(IB_UCM_DEV, 1); +err_chr: + return result; +} + +static void __exit ib_ucm_cleanup(void) +{ + class_simple_device_remove(IB_UCM_DEV); + class_simple_destroy(ib_ucm_class); + cdev_del(&ib_ucm_cdev); + unregister_chrdev_region(IB_UCM_DEV, 1); +} + +module_init(ib_ucm_init); +module_exit(ib_ucm_cleanup); diff --git a/drivers/infiniband/core/ucm.h b/drivers/infiniband/core/ucm.h new file mode 100644 index 000000000000..6d36606151b2 --- /dev/null +++ b/drivers/infiniband/core/ucm.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ucm.h 2208 2005-04-22 23:24:31Z libor $ + */ + +#ifndef UCM_H +#define UCM_H + +#include +#include +#include +#include + +#include +#include + +#define IB_UCM_CM_ID_INVALID 0xffffffff + +struct ib_ucm_file { + struct semaphore mutex; + struct file *filp; + /* + * list of pending events + */ + struct list_head ctxs; /* list of active connections */ + struct list_head events; /* list of pending events */ + wait_queue_head_t poll_wait; +}; + +struct ib_ucm_context { + int id; + int ref; + int error; + + struct ib_ucm_file *file; + struct ib_cm_id *cm_id; + struct semaphore mutex; + + struct list_head events; /* list of pending events. */ + struct list_head file_list; /* member in file ctx list */ +}; + +struct ib_ucm_event { + struct ib_ucm_context *ctx; + struct list_head file_list; /* member in file event list */ + struct list_head ctx_list; /* member in ctx event list */ + + struct ib_ucm_event_resp resp; + void *data; + void *info; + int data_len; + int info_len; + /* + * new connection identifiers needs to be saved until + * userspace can get a handle on them. + */ + struct ib_cm_id *cm_id; +}; + +#endif /* UCM_H */ -- cgit v1.2.3-55-g7522 From 2d0d099f1950bda2f712364a3bf74f20ddb61190 Mon Sep 17 00:00:00 2001 From: Tom Duffy Date: Wed, 27 Jul 2005 11:45:45 -0700 Subject: [PATCH] Add kernel portion of user CM implementation (fix) Include the patch openib-general changing class_simple to class. Signed-off-by: Tom Duffy Cc: Hal Rosenstock Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/ucm.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 4b4808a0be43..546ec61c407f 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1339,7 +1339,7 @@ static struct file_operations ib_ucm_fops = { }; -static struct class_simple *ib_ucm_class; +static struct class *ib_ucm_class; static struct cdev ib_ucm_cdev; static int __init ib_ucm_init(void) @@ -1360,17 +1360,14 @@ static int __init ib_ucm_init(void) goto err_cdev; } - ib_ucm_class = class_simple_create(THIS_MODULE, "infiniband_cm"); + ib_ucm_class = class_create(THIS_MODULE, "infiniband_cm"); if (IS_ERR(ib_ucm_class)) { result = PTR_ERR(ib_ucm_class); printk(KERN_ERR "UCM: Error <%d> creating class\n", result); goto err_class; } - class_simple_device_add(ib_ucm_class, - IB_UCM_DEV, - NULL, - "ucm"); + class_device_create(ib_ucm_class, IB_UCM_DEV, NULL, "ucm"); idr_init(&ctx_id_table); init_MUTEX(&ctx_id_mutex); @@ -1386,8 +1383,8 @@ err_chr: static void __exit ib_ucm_cleanup(void) { - class_simple_device_remove(IB_UCM_DEV); - class_simple_destroy(ib_ucm_class); + class_device_destroy(ib_ucm_class, IB_UCM_DEV); + class_destroy(ib_ucm_class); cdev_del(&ib_ucm_cdev); unregister_chrdev_region(IB_UCM_DEV, 1); } -- cgit v1.2.3-55-g7522 From 8fd65b096a7ba1fff69c7991f481ebac5498673e Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:45 -0700 Subject: [PATCH] IB: Hook up userspace CM to the make system Hook up userspace CM to the make system Signed-off-by: Libor Michalek Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 216cb281abdd..10be36731ed7 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,7 +1,7 @@ EXTRA_CFLAGS += -Idrivers/infiniband/include obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ - ib_cm.o ib_umad.o + ib_cm.o ib_umad.o ib_ucm.o obj-$(CONFIG_INFINIBAND_USER_VERBS) += ib_uverbs.o ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ @@ -15,4 +15,6 @@ ib_cm-y := cm.o ib_umad-y := user_mad.o +ib_ucm-y := ucm.o + ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o -- cgit v1.2.3-55-g7522 From f13f9f501a6eee14e495aba56ec6f70cf2328180 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 27 Jul 2005 11:45:46 -0700 Subject: [PATCH] IB: Eliminate sparse warnings in SA client Eliminate sparse warnings in SA client Signed-off-by: Hal Rosenstock Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/core/sa_query.c | 6 +++--- drivers/infiniband/include/ib_sa.h | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 18caf61d8847..795184931c83 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -600,7 +600,7 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, int gfp_mask, + int timeout_ms, unsigned int __nocast gfp_mask, void (*callback)(int status, struct ib_sa_path_rec *resp, void *context), @@ -702,7 +702,7 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, struct ib_sa_service_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, int gfp_mask, + int timeout_ms, unsigned int __nocast gfp_mask, void (*callback)(int status, struct ib_sa_service_rec *resp, void *context), @@ -785,7 +785,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, u8 method, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, int gfp_mask, + int timeout_ms, unsigned int __nocast gfp_mask, void (*callback)(int status, struct ib_sa_mcmember_rec *resp, void *context), diff --git a/drivers/infiniband/include/ib_sa.h b/drivers/infiniband/include/ib_sa.h index 62047b753dc0..6d999f7b5d93 100644 --- a/drivers/infiniband/include/ib_sa.h +++ b/drivers/infiniband/include/ib_sa.h @@ -256,7 +256,7 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query); int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, int gfp_mask, + int timeout_ms, unsigned int __nocast gfp_mask, void (*callback)(int status, struct ib_sa_path_rec *resp, void *context), @@ -267,7 +267,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, u8 method, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, int gfp_mask, + int timeout_ms, unsigned int __nocast gfp_mask, void (*callback)(int status, struct ib_sa_mcmember_rec *resp, void *context), @@ -278,7 +278,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, struct ib_sa_service_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, int gfp_mask, + int timeout_ms, unsigned int __nocast gfp_mask, void (*callback)(int status, struct ib_sa_service_rec *resp, void *context), @@ -313,7 +313,7 @@ static inline int ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, int gfp_mask, + int timeout_ms, unsigned int __nocast gfp_mask, void (*callback)(int status, struct ib_sa_mcmember_rec *resp, void *context), @@ -355,7 +355,7 @@ static inline int ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, int gfp_mask, + int timeout_ms, unsigned int __nocast gfp_mask, void (*callback)(int status, struct ib_sa_mcmember_rec *resp, void *context), -- cgit v1.2.3-55-g7522