From 0fbf2f6bda3f380fd54d1aa2c0275c0a0227eea7 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 7 Jul 2009 16:07:31 +0100 Subject: [infiniband] Provide a general mechanism for multicast group joins Generalise out the multicast group membership record code from IPoIB. --- src/drivers/net/ipoib.c | 142 +++---------------------- src/include/gpxe/errfile.h | 1 + src/include/gpxe/ib_mad.h | 2 + src/include/gpxe/ib_mcast.h | 19 ++++ src/net/infiniband.c | 4 + src/net/infiniband/ib_mcast.c | 235 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 274 insertions(+), 129 deletions(-) create mode 100644 src/include/gpxe/ib_mcast.h create mode 100644 src/net/infiniband/ib_mcast.c diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index 4b9f1e0b..36df342c 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -30,6 +30,7 @@ FILE_LICENCE ( GPL2_OR_LATER ); #include #include #include +#include #include /** @file @@ -67,20 +68,14 @@ struct ipoib_device { struct ib_queue_set meta; /** Broadcast MAC */ struct ipoib_mac broadcast; - /** Attached to multicast group + /** Joined to multicast group * - * This flag indicates whether or not we have attached our - * data queue pair to the broadcast multicast GID. + * This flag indicates whether or not we have initiated the + * join to the IPv4 multicast group. */ - int broadcast_attached; + int broadcast_joined; }; -/** TID half used to identify multicast member record replies */ -#define IPOIB_TID_MC_MEMBER_REC 0x22222222UL - -/** IPoIB metadata TID */ -static uint32_t ipoib_meta_tid = 0; - /** Broadcast IPoIB address */ static struct ipoib_mac ipoib_broadcast = { .qpn = htonl ( IB_QPN_BROADCAST ), @@ -332,67 +327,6 @@ struct net_device * alloc_ipoibdev ( size_t priv_size ) { **************************************************************************** */ -/** - * Transmit multicast group membership request - * - * @v ipoib IPoIB device - * @v gid Multicast GID - * @v join Join (rather than leave) group - * @ret rc Return status code - */ -static int ipoib_mc_member_record ( struct ipoib_device *ipoib, - struct ib_gid *gid, int join ) { - struct ib_device *ibdev = ipoib->ibdev; - struct io_buffer *iobuf; - struct ib_mad_sa *sa; - struct ib_address_vector av; - int rc; - - /* Allocate I/O buffer */ - iobuf = alloc_iob ( sizeof ( *sa ) ); - if ( ! iobuf ) - return -ENOMEM; - iob_put ( iobuf, sizeof ( *sa ) ); - sa = iobuf->data; - memset ( sa, 0, sizeof ( *sa ) ); - - /* Construct path record request */ - sa->mad_hdr.base_version = IB_MGMT_BASE_VERSION; - sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; - sa->mad_hdr.class_version = 2; - sa->mad_hdr.method = - ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE ); - sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ); - sa->mad_hdr.tid[0] = IPOIB_TID_MC_MEMBER_REC; - sa->mad_hdr.tid[1] = ipoib_meta_tid++; - sa->sa_hdr.comp_mask[1] = - htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | - IB_SA_MCMEMBER_REC_JOIN_STATE ); - sa->sa_data.mc_member_record.scope__join_state = 1; - memcpy ( &sa->sa_data.mc_member_record.mgid, gid, - sizeof ( sa->sa_data.mc_member_record.mgid ) ); - memcpy ( &sa->sa_data.mc_member_record.port_gid, &ibdev->gid, - sizeof ( sa->sa_data.mc_member_record.port_gid ) ); - - /* Construct address vector */ - memset ( &av, 0, sizeof ( av ) ); - av.lid = ibdev->sm_lid; - av.sl = ibdev->sm_sl; - av.qpn = IB_QPN_GMA; - av.qkey = IB_QKEY_GMA; - - /* Post send request */ - if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av, - iobuf ) ) != 0 ) { - DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n", - ipoib, strerror ( rc ) ); - free_iob ( iobuf ); - return rc; - } - - return 0; -} - /** * Transmit packet via IPoIB network device * @@ -529,33 +463,6 @@ static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused, free_iob ( iobuf ); } -/** - * Handle received IPoIB multicast membership record - * - * @v ipoib IPoIB device - * @v mc_member_record Multicast membership record - */ -static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib, - struct ib_mc_member_record *mc_member_record ) { - unsigned long data_qkey; - int joined; - int rc; - - /* Record parameters */ - joined = ( mc_member_record->scope__join_state & 0x0f ); - data_qkey = ntohl ( mc_member_record->qkey ); - DBGC ( ipoib, "IPoIB %p %s broadcast group: qkey %lx\n", - ipoib, ( joined ? "joined" : "left" ), data_qkey ); - - /* Update data queue pair qkey */ - if ( ( rc = ib_modify_qp ( ipoib->ibdev, ipoib->data.qp, - IB_MODIFY_QKEY, data_qkey ) ) != 0 ){ - DBGC ( ipoib, "IPoIB %p could not update data qkey: %s\n", - ipoib, strerror ( rc ) ); - return; - } -} - /** * Handle IPoIB metadata receive completion * @@ -594,10 +501,6 @@ ipoib_meta_complete_recv ( struct ib_device *ibdev __unused, } switch ( sa->mad_hdr.tid[0] ) { - case IPOIB_TID_MC_MEMBER_REC: - ipoib_recv_mc_member_record ( ipoib, - &sa->sa_data.mc_member_record ); - break; default: DBGC ( ipoib, "IPoIB %p unwanted response:\n", ipoib ); @@ -647,31 +550,13 @@ static void ipoib_irq ( struct net_device *netdev __unused, static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) { int rc; - /* Sanity check */ - if ( ! ipoib->data.qp ) - return 0; - - /* Attach data queue to broadcast multicast GID */ - assert ( ipoib->broadcast_attached == 0 ); - if ( ( rc = ib_mcast_attach ( ipoib->ibdev, ipoib->data.qp, - &ipoib->broadcast.gid ) ) != 0 ){ - DBGC ( ipoib, "IPoIB %p could not attach to broadcast GID: " - "%s\n", ipoib, strerror ( rc ) ); - return rc; - } - ipoib->broadcast_attached = 1; - - /* Initiate broadcast group join */ - if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast.gid, - 1 ) ) != 0 ) { - DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n", + if ( ( rc = ib_mcast_join ( ipoib->ibdev, ipoib->data.qp, + &ipoib->broadcast.gid ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n", ipoib, strerror ( rc ) ); return rc; } - - /* We will set link up on the network device when we receive - * the broadcast join response. - */ + ipoib->broadcast_joined = 1; return 0; } @@ -684,11 +569,10 @@ static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) { static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) { /* Detach data queue from broadcast multicast GID */ - if ( ipoib->broadcast_attached ) { - assert ( ipoib->data.qp != NULL ); - ib_mcast_detach ( ipoib->ibdev, ipoib->data.qp, - &ipoib->broadcast.gid ); - ipoib->broadcast_attached = 0; + if ( ipoib->broadcast_joined ) { + ib_mcast_leave ( ipoib->ibdev, ipoib->data.qp, + &ipoib->broadcast.gid ); + ipoib->broadcast_joined = 0; } } diff --git a/src/include/gpxe/errfile.h b/src/include/gpxe/errfile.h index 0eac0a8e..5e7fa09b 100644 --- a/src/include/gpxe/errfile.h +++ b/src/include/gpxe/errfile.h @@ -146,6 +146,7 @@ FILE_LICENCE ( GPL2_OR_LATER ); #define ERRFILE_ib_qset ( ERRFILE_NET | 0x001a0000 ) #define ERRFILE_ib_gma ( ERRFILE_NET | 0x001b0000 ) #define ERRFILE_ib_pathrec ( ERRFILE_NET | 0x001c0000 ) +#define ERRFILE_ib_mcast ( ERRFILE_NET | 0x001d0000 ) #define ERRFILE_image ( ERRFILE_IMAGE | 0x00000000 ) #define ERRFILE_elf ( ERRFILE_IMAGE | 0x00010000 ) diff --git a/src/include/gpxe/ib_mad.h b/src/include/gpxe/ib_mad.h index eaea12b8..d4582c34 100644 --- a/src/include/gpxe/ib_mad.h +++ b/src/include/gpxe/ib_mad.h @@ -203,6 +203,8 @@ struct ib_smp_class_specific { #define IB_SA_CLASS_VERSION 2 +#define IB_SA_METHOD_DELETE_RESP 0x95 + struct ib_rmpp_hdr { uint32_t raw[3]; } __attribute__ (( packed )); diff --git a/src/include/gpxe/ib_mcast.h b/src/include/gpxe/ib_mcast.h new file mode 100644 index 00000000..2ca3382b --- /dev/null +++ b/src/include/gpxe/ib_mcast.h @@ -0,0 +1,19 @@ +#ifndef _GPXE_IB_MCAST_H +#define _GPXE_IB_MCAST_H + +/** @file + * + * Infiniband multicast groups + * + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include + +extern int ib_mcast_join ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *gid ); +extern void ib_mcast_leave ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *gid ); + +#endif /* _GPXE_IB_MCAST_H */ diff --git a/src/net/infiniband.c b/src/net/infiniband.c index 369d490a..e71b3bcc 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -554,6 +554,10 @@ void ib_close ( struct ib_device *ibdev ) { * @v qp Queue pair * @v gid Multicast GID * @ret rc Return status code + * + * Note that this function handles only the local device's attachment + * to the multicast GID; it does not issue the relevant MADs to join + * the multicast group on the subnet. */ int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_gid *gid ) { diff --git a/src/net/infiniband/ib_mcast.c b/src/net/infiniband/ib_mcast.c new file mode 100644 index 00000000..358ee0d1 --- /dev/null +++ b/src/net/infiniband/ib_mcast.c @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2007 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include +#include +#include +#include +#include +#include +#include +#include + +/** @file + * + * Infiniband multicast groups + * + */ + +/** + * Transmit multicast group membership request + * + * @v ibdev Infiniband device + * @v gid Multicast GID + * @v join Join (rather than leave) group + * @ret rc Return status code + */ +static int ib_mc_member_request ( struct ib_device *ibdev, struct ib_gid *gid, + int join ) { + union ib_mad mad; + struct ib_mad_sa *sa = &mad.sa; + int rc; + + /* Construct multicast membership record request */ + memset ( sa, 0, sizeof ( *sa ) ); + sa->mad_hdr.base_version = IB_MGMT_BASE_VERSION; + sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + sa->mad_hdr.class_version = IB_SA_CLASS_VERSION; + sa->mad_hdr.method = + ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE ); + sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ); + sa->sa_hdr.comp_mask[1] = + htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_JOIN_STATE ); + sa->sa_data.mc_member_record.scope__join_state = 1; + memcpy ( &sa->sa_data.mc_member_record.mgid, gid, + sizeof ( sa->sa_data.mc_member_record.mgid ) ); + memcpy ( &sa->sa_data.mc_member_record.port_gid, &ibdev->gid, + sizeof ( sa->sa_data.mc_member_record.port_gid ) ); + + /* Issue multicast membership record request */ + if ( ( rc = ib_gma_request ( &ibdev->gma, &mad, NULL, + join ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not join group: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Join multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + * @ret rc Return status code + */ +int ib_mcast_join ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *gid ) { + int rc; + + DBGC ( ibdev, "IBDEV %p QPN %lx joining %08x:%08x:%08x:%08x\n", + ibdev, qp->qpn, ntohl ( gid->u.dwords[0] ), + ntohl ( gid->u.dwords[1] ), ntohl ( gid->u.dwords[2] ), + ntohl ( gid->u.dwords[3] ) ); + + /* Attach queue pair to multicast GID */ + if ( ( rc = ib_mcast_attach ( ibdev, qp, gid ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not attach: %s\n", + ibdev, strerror ( rc ) ); + goto err_mcast_attach; + } + + /* Initiate multicast membership join */ + if ( ( rc = ib_mc_member_request ( ibdev, gid, 1 ) ) != 0 ) + goto err_mc_member_record; + + return 0; + + err_mc_member_record: + ib_mcast_detach ( ibdev, qp, gid ); + err_mcast_attach: + return rc; +} + +/** + * Leave multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + */ +void ib_mcast_leave ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *gid ) { + + DBGC ( ibdev, "IBDEV %p QPN %lx leaving %08x:%08x:%08x:%08x\n", + ibdev, qp->qpn, ntohl ( gid->u.dwords[0] ), + ntohl ( gid->u.dwords[1] ), ntohl ( gid->u.dwords[2] ), + ntohl ( gid->u.dwords[3] ) ); + + /* Detach queue pair from multicast GID */ + ib_mcast_detach ( ibdev, qp, gid ); + + /* Initiate multicast membership leave */ + ib_mc_member_request ( ibdev, gid, 0 ); +} + +/** + * Handle multicast membership record join response + * + * @v ibdev Infiniband device + * @v mad MAD + * @ret rc Return status code + */ +static int ib_handle_mc_member_join ( struct ib_device *ibdev, + union ib_mad *mad ) { + struct ib_mc_member_record *mc_member_record = + &mad->sa.sa_data.mc_member_record; + struct ib_queue_pair *qp; + struct ib_gid *gid; + unsigned long qkey; + int rc; + + /* Ignore if not a success */ + if ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ) { + DBGC ( ibdev, "IBDEV %p join failed with status %04x\n", + ibdev, ntohs ( mad->hdr.status ) ); + return -EINVAL; + } + + /* Extract MAD parameters */ + gid = &mc_member_record->mgid; + qkey = ntohl ( mc_member_record->qkey ); + + /* Locate matching queue pair */ + qp = ib_find_qp_mgid ( ibdev, gid ); + if ( ! qp ) { + DBGC ( ibdev, "IBDEV %p has no QP to join " + "%08x:%08x:%08x:%08x\n", ibdev, + ntohl ( gid->u.dwords[0] ), + ntohl ( gid->u.dwords[1] ), + ntohl ( gid->u.dwords[2] ), + ntohl ( gid->u.dwords[3] ) ); + return -ENOENT; + } + DBGC ( ibdev, "IBDEV %p QPN %lx joined %08x:%08x:%08x:%08x qkey " + "%lx\n", ibdev, qp->qpn, + ntohl ( gid->u.dwords[0] ), ntohl ( gid->u.dwords[1] ), + ntohl ( gid->u.dwords[2] ), ntohl ( gid->u.dwords[3] ), + qkey ); + + /* Set queue key */ + if ( ( rc = ib_modify_qp ( ibdev, qp, IB_MODIFY_QKEY, qkey ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p QPN %lx could not modify qkey: %s\n", + ibdev, qp->qpn, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Handle multicast membership record leave response + * + * @v ibdev Infiniband device + * @v mad MAD + * @ret rc Return status code + */ +static int ib_handle_mc_member_leave ( struct ib_device *ibdev, + union ib_mad *mad ) { + struct ib_mc_member_record *mc_member_record = + &mad->sa.sa_data.mc_member_record; + struct ib_gid *gid; + + /* Ignore if not a success */ + if ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ) { + DBGC ( ibdev, "IBDEV %p leave failed with status %04x\n", + ibdev, ntohs ( mad->hdr.status ) ); + return -EINVAL; + } + + /* Extract MAD parameters */ + gid = &mc_member_record->mgid; + DBGC ( ibdev, "IBDEV %p left %08x:%08x:%08x:%08x\n", ibdev, + ntohl ( gid->u.dwords[0] ), ntohl ( gid->u.dwords[1] ), + ntohl ( gid->u.dwords[2] ), ntohl ( gid->u.dwords[3] ) ); + + return 0; +} + +/** Multicast membership record response handler */ +struct ib_mad_handler ib_mc_member_record_handlers[] __ib_mad_handler = { + { + .mgmt_class = IB_MGMT_CLASS_SUBN_ADM, + .class_version = IB_SA_CLASS_VERSION, + .method = IB_MGMT_METHOD_GET_RESP, + .attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ), + .handle = ib_handle_mc_member_join, + }, + { + .mgmt_class = IB_MGMT_CLASS_SUBN_ADM, + .class_version = IB_SA_CLASS_VERSION, + .method = IB_SA_METHOD_DELETE_RESP, + .attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ), + .handle = ib_handle_mc_member_leave, + }, +}; -- cgit v1.2.3-55-g7522