summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Brown2009-08-09 02:25:38 +0200
committerMichael Brown2009-08-09 02:31:07 +0200
commit46073f12391e5a467fbf5d5853ea91333fcce843 (patch)
treef364ef93930613b99d602fff44e644427c13d956
parent[ath5k] Add support for non-802.11n Atheros wireless NICs (diff)
downloadipxe-46073f12391e5a467fbf5d5853ea91333fcce843.tar.gz
ipxe-46073f12391e5a467fbf5d5853ea91333fcce843.tar.xz
ipxe-46073f12391e5a467fbf5d5853ea91333fcce843.zip
[infiniband] Handle duplicate Communication Management REPs
We will terminate our transaction as soon as we receive the first CM REP, since that provides all the state that we need. However, the peer may resend the REP if it didn't see our RTU, and if we don't respond with another RTU we risk being disconnected. (This protocol appears not to handle retries gracefully.) Fix by adding a management agent that will listen for these duplicate REPs and send back an RTU.
-rw-r--r--src/include/gpxe/ib_cm.h3
-rw-r--r--src/net/infiniband/ib_cm.c55
2 files changed, 58 insertions, 0 deletions
diff --git a/src/include/gpxe/ib_cm.h b/src/include/gpxe/ib_cm.h
index 07f5b716..670fffda 100644
--- a/src/include/gpxe/ib_cm.h
+++ b/src/include/gpxe/ib_cm.h
@@ -46,6 +46,9 @@ struct ib_connection {
/** Connection operations */
struct ib_connection_operations *op;
+ /** List of connections */
+ struct list_head list;
+
/** Path to target */
struct ib_path *path;
/** Connection request management transaction */
diff --git a/src/net/infiniband/ib_cm.c b/src/net/infiniband/ib_cm.c
index 01ce21c2..13539b40 100644
--- a/src/net/infiniband/ib_cm.c
+++ b/src/net/infiniband/ib_cm.c
@@ -36,6 +36,9 @@ FILE_LICENCE ( GPL2_OR_LATER );
*
*/
+/** List of connections */
+static LIST_HEAD ( ib_cm_conns );
+
/**
* Send "ready to use" response
*
@@ -72,6 +75,54 @@ static int ib_cm_send_rtu ( struct ib_device *ibdev,
}
/**
+ * Handle duplicate connection replies
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @v av Source address vector
+ * @ret rc Return status code
+ *
+ * If a "ready to use" MAD is lost, the peer may resend the connection
+ * reply. We have to respond to these with duplicate "ready to use"
+ * MADs, otherwise the peer may time out and drop the connection.
+ */
+static void ib_cm_connect_rep ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_cm_connect_reply *connect_rep =
+ &mad->cm.cm_data.connect_reply;
+ struct ib_connection *conn;
+ int rc;
+
+ /* Identify connection */
+ list_for_each_entry ( conn, &ib_cm_conns, list ) {
+ if ( ntohl ( connect_rep->remote_id ) != conn->local_id )
+ continue;
+ /* Try to send "ready to use" reply */
+ if ( ( rc = ib_cm_send_rtu ( ibdev, mi, conn, av ) ) != 0 ) {
+ /* Ignore errors */
+ return;
+ }
+ return;
+ }
+
+ DBG ( "CM unidentified connection %08x\n",
+ ntohl ( connect_rep->remote_id ) );
+}
+
+/** Communication management agents */
+struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = {
+ {
+ .mgmt_class = IB_MGMT_CLASS_CM,
+ .class_version = IB_CM_CLASS_VERSION,
+ .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
+ .handle = ib_cm_connect_rep,
+ },
+};
+
+/**
* Handle connection request transaction completion
*
* @v ibdev Infiniband device
@@ -296,6 +347,9 @@ ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp,
goto err_create_path;
ib_path_set_ownerdata ( conn->path, conn );
+ /* Add to list of connections */
+ list_add ( &conn->list, &ib_cm_conns );
+
DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n",
conn, ibdev, qp->qpn );
DBGC ( conn, "CM %p connecting to %08x:%08x:%08x:%08x %08x:%08x\n",
@@ -324,6 +378,7 @@ void ib_destroy_conn ( struct ib_device *ibdev,
struct ib_queue_pair *qp __unused,
struct ib_connection *conn ) {
+ list_del ( &conn->list );
if ( conn->madx )
ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
if ( conn->path )