summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Brown2009-07-07 17:07:31 +0200
committerMichael Brown2009-07-18 00:06:34 +0200
commit0fbf2f6bda3f380fd54d1aa2c0275c0a0227eea7 (patch)
treeeca6dfb6d3c6fb9e17cb91d54fc4ab4e9d3f4bb4
parent[infiniband] Allow for sending MADs via GMA without retransmission (diff)
downloadipxe-0fbf2f6bda3f380fd54d1aa2c0275c0a0227eea7.tar.gz
ipxe-0fbf2f6bda3f380fd54d1aa2c0275c0a0227eea7.tar.xz
ipxe-0fbf2f6bda3f380fd54d1aa2c0275c0a0227eea7.zip
[infiniband] Provide a general mechanism for multicast group joins
Generalise out the multicast group membership record code from IPoIB.
-rw-r--r--src/drivers/net/ipoib.c142
-rw-r--r--src/include/gpxe/errfile.h1
-rw-r--r--src/include/gpxe/ib_mad.h2
-rw-r--r--src/include/gpxe/ib_mcast.h19
-rw-r--r--src/net/infiniband.c4
-rw-r--r--src/net/infiniband/ib_mcast.c235
6 files changed, 274 insertions, 129 deletions
diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c
index 4b9f1e0b..36df342c 100644
--- a/src/drivers/net/ipoib.c
+++ b/src/drivers/net/ipoib.c
@@ -30,6 +30,7 @@ FILE_LICENCE ( GPL2_OR_LATER );
#include <gpxe/infiniband.h>
#include <gpxe/ib_qset.h>
#include <gpxe/ib_pathrec.h>
+#include <gpxe/ib_mcast.h>
#include <gpxe/ipoib.h>
/** @file
@@ -67,20 +68,14 @@ struct ipoib_device {
struct ib_queue_set meta;
/** Broadcast MAC */
struct ipoib_mac broadcast;
- /** Attached to multicast group
+ /** Joined to multicast group
*
- * This flag indicates whether or not we have attached our
- * data queue pair to the broadcast multicast GID.
+ * This flag indicates whether or not we have initiated the
+ * join to the IPv4 multicast group.
*/
- int broadcast_attached;
+ int broadcast_joined;
};
-/** TID half used to identify multicast member record replies */
-#define IPOIB_TID_MC_MEMBER_REC 0x22222222UL
-
-/** IPoIB metadata TID */
-static uint32_t ipoib_meta_tid = 0;
-
/** Broadcast IPoIB address */
static struct ipoib_mac ipoib_broadcast = {
.qpn = htonl ( IB_QPN_BROADCAST ),
@@ -333,67 +328,6 @@ struct net_device * alloc_ipoibdev ( size_t priv_size ) {
*/
/**
- * Transmit multicast group membership request
- *
- * @v ipoib IPoIB device
- * @v gid Multicast GID
- * @v join Join (rather than leave) group
- * @ret rc Return status code
- */
-static int ipoib_mc_member_record ( struct ipoib_device *ipoib,
- struct ib_gid *gid, int join ) {
- struct ib_device *ibdev = ipoib->ibdev;
- struct io_buffer *iobuf;
- struct ib_mad_sa *sa;
- struct ib_address_vector av;
- int rc;
-
- /* Allocate I/O buffer */
- iobuf = alloc_iob ( sizeof ( *sa ) );
- if ( ! iobuf )
- return -ENOMEM;
- iob_put ( iobuf, sizeof ( *sa ) );
- sa = iobuf->data;
- memset ( sa, 0, sizeof ( *sa ) );
-
- /* Construct path record request */
- sa->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
- sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
- sa->mad_hdr.class_version = 2;
- sa->mad_hdr.method =
- ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE );
- sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC );
- sa->mad_hdr.tid[0] = IPOIB_TID_MC_MEMBER_REC;
- sa->mad_hdr.tid[1] = ipoib_meta_tid++;
- sa->sa_hdr.comp_mask[1] =
- htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
- IB_SA_MCMEMBER_REC_JOIN_STATE );
- sa->sa_data.mc_member_record.scope__join_state = 1;
- memcpy ( &sa->sa_data.mc_member_record.mgid, gid,
- sizeof ( sa->sa_data.mc_member_record.mgid ) );
- memcpy ( &sa->sa_data.mc_member_record.port_gid, &ibdev->gid,
- sizeof ( sa->sa_data.mc_member_record.port_gid ) );
-
- /* Construct address vector */
- memset ( &av, 0, sizeof ( av ) );
- av.lid = ibdev->sm_lid;
- av.sl = ibdev->sm_sl;
- av.qpn = IB_QPN_GMA;
- av.qkey = IB_QKEY_GMA;
-
- /* Post send request */
- if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av,
- iobuf ) ) != 0 ) {
- DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n",
- ipoib, strerror ( rc ) );
- free_iob ( iobuf );
- return rc;
- }
-
- return 0;
-}
-
-/**
* Transmit packet via IPoIB network device
*
* @v netdev Network device
@@ -530,33 +464,6 @@ static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused,
}
/**
- * Handle received IPoIB multicast membership record
- *
- * @v ipoib IPoIB device
- * @v mc_member_record Multicast membership record
- */
-static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib,
- struct ib_mc_member_record *mc_member_record ) {
- unsigned long data_qkey;
- int joined;
- int rc;
-
- /* Record parameters */
- joined = ( mc_member_record->scope__join_state & 0x0f );
- data_qkey = ntohl ( mc_member_record->qkey );
- DBGC ( ipoib, "IPoIB %p %s broadcast group: qkey %lx\n",
- ipoib, ( joined ? "joined" : "left" ), data_qkey );
-
- /* Update data queue pair qkey */
- if ( ( rc = ib_modify_qp ( ipoib->ibdev, ipoib->data.qp,
- IB_MODIFY_QKEY, data_qkey ) ) != 0 ){
- DBGC ( ipoib, "IPoIB %p could not update data qkey: %s\n",
- ipoib, strerror ( rc ) );
- return;
- }
-}
-
-/**
* Handle IPoIB metadata receive completion
*
* @v ibdev Infiniband device
@@ -594,10 +501,6 @@ ipoib_meta_complete_recv ( struct ib_device *ibdev __unused,
}
switch ( sa->mad_hdr.tid[0] ) {
- case IPOIB_TID_MC_MEMBER_REC:
- ipoib_recv_mc_member_record ( ipoib,
- &sa->sa_data.mc_member_record );
- break;
default:
DBGC ( ipoib, "IPoIB %p unwanted response:\n",
ipoib );
@@ -647,31 +550,13 @@ static void ipoib_irq ( struct net_device *netdev __unused,
static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
int rc;
- /* Sanity check */
- if ( ! ipoib->data.qp )
- return 0;
-
- /* Attach data queue to broadcast multicast GID */
- assert ( ipoib->broadcast_attached == 0 );
- if ( ( rc = ib_mcast_attach ( ipoib->ibdev, ipoib->data.qp,
- &ipoib->broadcast.gid ) ) != 0 ){
- DBGC ( ipoib, "IPoIB %p could not attach to broadcast GID: "
- "%s\n", ipoib, strerror ( rc ) );
- return rc;
- }
- ipoib->broadcast_attached = 1;
-
- /* Initiate broadcast group join */
- if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast.gid,
- 1 ) ) != 0 ) {
- DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n",
+ if ( ( rc = ib_mcast_join ( ipoib->ibdev, ipoib->data.qp,
+ &ipoib->broadcast.gid ) ) != 0 ) {
+ DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
ipoib, strerror ( rc ) );
return rc;
}
-
- /* We will set link up on the network device when we receive
- * the broadcast join response.
- */
+ ipoib->broadcast_joined = 1;
return 0;
}
@@ -684,11 +569,10 @@ static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
/* Detach data queue from broadcast multicast GID */
- if ( ipoib->broadcast_attached ) {
- assert ( ipoib->data.qp != NULL );
- ib_mcast_detach ( ipoib->ibdev, ipoib->data.qp,
- &ipoib->broadcast.gid );
- ipoib->broadcast_attached = 0;
+ if ( ipoib->broadcast_joined ) {
+ ib_mcast_leave ( ipoib->ibdev, ipoib->data.qp,
+ &ipoib->broadcast.gid );
+ ipoib->broadcast_joined = 0;
}
}
diff --git a/src/include/gpxe/errfile.h b/src/include/gpxe/errfile.h
index 0eac0a8e..5e7fa09b 100644
--- a/src/include/gpxe/errfile.h
+++ b/src/include/gpxe/errfile.h
@@ -146,6 +146,7 @@ FILE_LICENCE ( GPL2_OR_LATER );
#define ERRFILE_ib_qset ( ERRFILE_NET | 0x001a0000 )
#define ERRFILE_ib_gma ( ERRFILE_NET | 0x001b0000 )
#define ERRFILE_ib_pathrec ( ERRFILE_NET | 0x001c0000 )
+#define ERRFILE_ib_mcast ( ERRFILE_NET | 0x001d0000 )
#define ERRFILE_image ( ERRFILE_IMAGE | 0x00000000 )
#define ERRFILE_elf ( ERRFILE_IMAGE | 0x00010000 )
diff --git a/src/include/gpxe/ib_mad.h b/src/include/gpxe/ib_mad.h
index eaea12b8..d4582c34 100644
--- a/src/include/gpxe/ib_mad.h
+++ b/src/include/gpxe/ib_mad.h
@@ -203,6 +203,8 @@ struct ib_smp_class_specific {
#define IB_SA_CLASS_VERSION 2
+#define IB_SA_METHOD_DELETE_RESP 0x95
+
struct ib_rmpp_hdr {
uint32_t raw[3];
} __attribute__ (( packed ));
diff --git a/src/include/gpxe/ib_mcast.h b/src/include/gpxe/ib_mcast.h
new file mode 100644
index 00000000..2ca3382b
--- /dev/null
+++ b/src/include/gpxe/ib_mcast.h
@@ -0,0 +1,19 @@
+#ifndef _GPXE_IB_MCAST_H
+#define _GPXE_IB_MCAST_H
+
+/** @file
+ *
+ * Infiniband multicast groups
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <gpxe/infiniband.h>
+
+extern int ib_mcast_join ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ struct ib_gid *gid );
+extern void ib_mcast_leave ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ struct ib_gid *gid );
+
+#endif /* _GPXE_IB_MCAST_H */
diff --git a/src/net/infiniband.c b/src/net/infiniband.c
index 369d490a..e71b3bcc 100644
--- a/src/net/infiniband.c
+++ b/src/net/infiniband.c
@@ -554,6 +554,10 @@ void ib_close ( struct ib_device *ibdev ) {
* @v qp Queue pair
* @v gid Multicast GID
* @ret rc Return status code
+ *
+ * Note that this function handles only the local device's attachment
+ * to the multicast GID; it does not issue the relevant MADs to join
+ * the multicast group on the subnet.
*/
int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
struct ib_gid *gid ) {
diff --git a/src/net/infiniband/ib_mcast.c b/src/net/infiniband/ib_mcast.c
new file mode 100644
index 00000000..358ee0d1
--- /dev/null
+++ b/src/net/infiniband/ib_mcast.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <string.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <gpxe/list.h>
+#include <gpxe/infiniband.h>
+#include <gpxe/ib_gma.h>
+#include <gpxe/ib_mcast.h>
+
+/** @file
+ *
+ * Infiniband multicast groups
+ *
+ */
+
+/**
+ * Transmit multicast group membership request
+ *
+ * @v ibdev Infiniband device
+ * @v gid Multicast GID
+ * @v join Join (rather than leave) group
+ * @ret rc Return status code
+ */
+static int ib_mc_member_request ( struct ib_device *ibdev, struct ib_gid *gid,
+ int join ) {
+ union ib_mad mad;
+ struct ib_mad_sa *sa = &mad.sa;
+ int rc;
+
+ /* Construct multicast membership record request */
+ memset ( sa, 0, sizeof ( *sa ) );
+ sa->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+ sa->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+ sa->mad_hdr.method =
+ ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE );
+ sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC );
+ sa->sa_hdr.comp_mask[1] =
+ htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
+ IB_SA_MCMEMBER_REC_JOIN_STATE );
+ sa->sa_data.mc_member_record.scope__join_state = 1;
+ memcpy ( &sa->sa_data.mc_member_record.mgid, gid,
+ sizeof ( sa->sa_data.mc_member_record.mgid ) );
+ memcpy ( &sa->sa_data.mc_member_record.port_gid, &ibdev->gid,
+ sizeof ( sa->sa_data.mc_member_record.port_gid ) );
+
+ /* Issue multicast membership record request */
+ if ( ( rc = ib_gma_request ( &ibdev->gma, &mad, NULL,
+ join ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not join group: %s\n",
+ ibdev, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Join multicast group
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v gid Multicast GID
+ * @ret rc Return status code
+ */
+int ib_mcast_join ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ struct ib_gid *gid ) {
+ int rc;
+
+ DBGC ( ibdev, "IBDEV %p QPN %lx joining %08x:%08x:%08x:%08x\n",
+ ibdev, qp->qpn, ntohl ( gid->u.dwords[0] ),
+ ntohl ( gid->u.dwords[1] ), ntohl ( gid->u.dwords[2] ),
+ ntohl ( gid->u.dwords[3] ) );
+
+ /* Attach queue pair to multicast GID */
+ if ( ( rc = ib_mcast_attach ( ibdev, qp, gid ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not attach: %s\n",
+ ibdev, strerror ( rc ) );
+ goto err_mcast_attach;
+ }
+
+ /* Initiate multicast membership join */
+ if ( ( rc = ib_mc_member_request ( ibdev, gid, 1 ) ) != 0 )
+ goto err_mc_member_record;
+
+ return 0;
+
+ err_mc_member_record:
+ ib_mcast_detach ( ibdev, qp, gid );
+ err_mcast_attach:
+ return rc;
+}
+
+/**
+ * Leave multicast group
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v gid Multicast GID
+ */
+void ib_mcast_leave ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ struct ib_gid *gid ) {
+
+ DBGC ( ibdev, "IBDEV %p QPN %lx leaving %08x:%08x:%08x:%08x\n",
+ ibdev, qp->qpn, ntohl ( gid->u.dwords[0] ),
+ ntohl ( gid->u.dwords[1] ), ntohl ( gid->u.dwords[2] ),
+ ntohl ( gid->u.dwords[3] ) );
+
+ /* Detach queue pair from multicast GID */
+ ib_mcast_detach ( ibdev, qp, gid );
+
+ /* Initiate multicast membership leave */
+ ib_mc_member_request ( ibdev, gid, 0 );
+}
+
+/**
+ * Handle multicast membership record join response
+ *
+ * @v ibdev Infiniband device
+ * @v mad MAD
+ * @ret rc Return status code
+ */
+static int ib_handle_mc_member_join ( struct ib_device *ibdev,
+ union ib_mad *mad ) {
+ struct ib_mc_member_record *mc_member_record =
+ &mad->sa.sa_data.mc_member_record;
+ struct ib_queue_pair *qp;
+ struct ib_gid *gid;
+ unsigned long qkey;
+ int rc;
+
+ /* Ignore if not a success */
+ if ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ) {
+ DBGC ( ibdev, "IBDEV %p join failed with status %04x\n",
+ ibdev, ntohs ( mad->hdr.status ) );
+ return -EINVAL;
+ }
+
+ /* Extract MAD parameters */
+ gid = &mc_member_record->mgid;
+ qkey = ntohl ( mc_member_record->qkey );
+
+ /* Locate matching queue pair */
+ qp = ib_find_qp_mgid ( ibdev, gid );
+ if ( ! qp ) {
+ DBGC ( ibdev, "IBDEV %p has no QP to join "
+ "%08x:%08x:%08x:%08x\n", ibdev,
+ ntohl ( gid->u.dwords[0] ),
+ ntohl ( gid->u.dwords[1] ),
+ ntohl ( gid->u.dwords[2] ),
+ ntohl ( gid->u.dwords[3] ) );
+ return -ENOENT;
+ }
+ DBGC ( ibdev, "IBDEV %p QPN %lx joined %08x:%08x:%08x:%08x qkey "
+ "%lx\n", ibdev, qp->qpn,
+ ntohl ( gid->u.dwords[0] ), ntohl ( gid->u.dwords[1] ),
+ ntohl ( gid->u.dwords[2] ), ntohl ( gid->u.dwords[3] ),
+ qkey );
+
+ /* Set queue key */
+ if ( ( rc = ib_modify_qp ( ibdev, qp, IB_MODIFY_QKEY, qkey ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p QPN %lx could not modify qkey: %s\n",
+ ibdev, qp->qpn, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Handle multicast membership record leave response
+ *
+ * @v ibdev Infiniband device
+ * @v mad MAD
+ * @ret rc Return status code
+ */
+static int ib_handle_mc_member_leave ( struct ib_device *ibdev,
+ union ib_mad *mad ) {
+ struct ib_mc_member_record *mc_member_record =
+ &mad->sa.sa_data.mc_member_record;
+ struct ib_gid *gid;
+
+ /* Ignore if not a success */
+ if ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ) {
+ DBGC ( ibdev, "IBDEV %p leave failed with status %04x\n",
+ ibdev, ntohs ( mad->hdr.status ) );
+ return -EINVAL;
+ }
+
+ /* Extract MAD parameters */
+ gid = &mc_member_record->mgid;
+ DBGC ( ibdev, "IBDEV %p left %08x:%08x:%08x:%08x\n", ibdev,
+ ntohl ( gid->u.dwords[0] ), ntohl ( gid->u.dwords[1] ),
+ ntohl ( gid->u.dwords[2] ), ntohl ( gid->u.dwords[3] ) );
+
+ return 0;
+}
+
+/** Multicast membership record response handler */
+struct ib_mad_handler ib_mc_member_record_handlers[] __ib_mad_handler = {
+ {
+ .mgmt_class = IB_MGMT_CLASS_SUBN_ADM,
+ .class_version = IB_SA_CLASS_VERSION,
+ .method = IB_MGMT_METHOD_GET_RESP,
+ .attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ),
+ .handle = ib_handle_mc_member_join,
+ },
+ {
+ .mgmt_class = IB_MGMT_CLASS_SUBN_ADM,
+ .class_version = IB_SA_CLASS_VERSION,
+ .method = IB_SA_METHOD_DELETE_RESP,
+ .attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ),
+ .handle = ib_handle_mc_member_leave,
+ },
+};