From 9e5fd8ec59cd44dc9ba349d4feee37830fca6a14 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Fri, 7 Nov 2008 08:39:40 +0000 Subject: [infiniband] Add raw packet parser and constructor This can be used with cards that require the driver to construct and parse packet headers manually. Headers are optionally handled out-of-line from the packet payload, since some such cards will split received headers into a separate ring buffer. --- src/drivers/infiniband/ib_packet.c | 234 +++++++++++++++++++++++++++++++++++++ src/include/gpxe/errfile.h | 1 + src/include/gpxe/ib_packet.h | 35 +++++- src/include/gpxe/infiniband.h | 54 ++++----- src/net/infiniband.c | 135 +++++++++++++++++++-- 5 files changed, 423 insertions(+), 36 deletions(-) create mode 100644 src/drivers/infiniband/ib_packet.c diff --git a/src/drivers/infiniband/ib_packet.c b/src/drivers/infiniband/ib_packet.c new file mode 100644 index 00000000..74721337 --- /dev/null +++ b/src/drivers/infiniband/ib_packet.c @@ -0,0 +1,234 @@ +/* + * Copyright (C) 2008 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * @file + * + * Infiniband Packet Formats + * + */ + +/** + * Add IB headers + * + * @v ibdev Infiniband device + * @v iobuf I/O buffer to contain headers + * @v qp Queue pair + * @v payload_len Payload length + * @v av Address vector + */ +int ib_push ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_queue_pair *qp, size_t payload_len, + const struct ib_address_vector *av ) { + struct ib_local_route_header *lrh; + struct ib_global_route_header *grh; + struct ib_base_transport_header *bth; + struct ib_datagram_extended_transport_header *deth; + size_t orig_iob_len = iob_len ( iobuf ); + size_t pad_len; + size_t lrh_len; + size_t grh_len; + unsigned int vl; + unsigned int lnh; + + DBGC2 ( ibdev, "IBDEV %p TX %04x:%08lx => %04x:%08lx (key %08lx)\n", + ibdev, ibdev->lid, qp->qpn, av->lid, av->qpn, av->qkey ); + + /* Calculate packet length */ + pad_len = ( (-payload_len) & 0x3 ); + payload_len += pad_len; + payload_len += 4; /* ICRC */ + + /* Reserve space for headers */ + orig_iob_len = iob_len ( iobuf ); + deth = iob_push ( iobuf, sizeof ( *deth ) ); + bth = iob_push ( iobuf, sizeof ( *bth ) ); + grh_len = ( payload_len + iob_len ( iobuf ) - orig_iob_len ); + grh = ( av->gid_present ? + iob_push ( iobuf, sizeof ( *grh ) ) : NULL ); + lrh = iob_push ( iobuf, sizeof ( *lrh ) ); + lrh_len = ( payload_len + iob_len ( iobuf ) - orig_iob_len ); + + /* Construct LRH */ + vl = ( ( av->qpn == IB_QPN_SMP ) ? IB_VL_SMP : IB_VL_DEFAULT ); + lrh->vl__lver = ( vl << 4 ); + lnh = ( grh ? IB_LNH_GRH : IB_LNH_BTH ); + lrh->sl__lnh = ( ( av->sl << 4 ) | lnh ); + lrh->dlid = htons ( av->lid ); + lrh->length = htons ( lrh_len >> 2 ); + lrh->slid = htons ( ibdev->lid ); + + /* Construct GRH, if required */ + if ( grh ) { + grh->ipver__tclass__flowlabel = + htonl ( IB_GRH_IPVER_IPv6 << 28 ); + grh->paylen = htons ( grh_len ); + grh->nxthdr = IB_GRH_NXTHDR_IBA; + grh->hoplmt = 0; + memcpy ( &grh->sgid, &ibdev->gid, sizeof ( grh->sgid ) ); + memcpy ( &grh->dgid, &av->gid, sizeof ( grh->dgid ) ); + } + + /* Construct BTH */ + bth->opcode = BTH_OPCODE_UD_SEND; + bth->se__m__padcnt__tver = ( pad_len << 4 ); + bth->pkey = htons ( ibdev->pkey ); + bth->dest_qp = htonl ( av->qpn ); + bth->ack__psn = htonl ( ( ibdev->psn++ ) & 0xffffffUL ); + + /* Construct DETH */ + deth->qkey = htonl ( av->qkey ); + deth->src_qp = htonl ( qp->qpn ); + + DBGCP_HDA ( ibdev, 0, iobuf->data, + ( iob_len ( iobuf ) - orig_iob_len ) ); + + return 0; +} + +/** + * Remove IB headers + * + * @v ibdev Infiniband device + * @v iobuf I/O buffer containing headers + * @v qp Queue pair to fill in, or NULL + * @v payload_len Payload length to fill in, or NULL + * @v av Address vector to fill in + */ +int ib_pull ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_queue_pair **qp, size_t *payload_len, + struct ib_address_vector *av ) { + struct ib_local_route_header *lrh; + struct ib_global_route_header *grh; + struct ib_base_transport_header *bth; + struct ib_datagram_extended_transport_header *deth; + size_t orig_iob_len = iob_len ( iobuf ); + unsigned int lnh; + size_t pad_len; + unsigned long qpn; + unsigned int lid; + + /* Clear return values */ + if ( qp ) + *qp = NULL; + if ( payload_len ) + *payload_len = 0; + memset ( av, 0, sizeof ( *av ) ); + + /* Extract LRH */ + if ( iob_len ( iobuf ) < sizeof ( *lrh ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for LRH\n", + ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + lrh = iobuf->data; + iob_pull ( iobuf, sizeof ( *lrh ) ); + av->lid = ntohs ( lrh->slid ); + av->sl = ( lrh->sl__lnh >> 4 ); + lnh = ( lrh->sl__lnh & 0x3 ); + lid = ntohs ( lrh->dlid ); + + /* Reject unsupported packets */ + if ( ! ( ( lnh == IB_LNH_BTH ) || ( lnh == IB_LNH_GRH ) ) ) { + DBGC ( ibdev, "IBDEV %p RX unsupported LNH %x\n", + ibdev, lnh ); + return -ENOTSUP; + } + + /* Extract GRH, if present */ + if ( lnh == IB_LNH_GRH ) { + if ( iob_len ( iobuf ) < sizeof ( *grh ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) " + "for GRH\n", ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + grh = iobuf->data; + iob_pull ( iobuf, sizeof ( *grh ) ); + av->gid_present = 1; + memcpy ( &av->gid, &grh->sgid, sizeof ( av->gid ) ); + } else { + grh = NULL; + } + + /* Extract BTH */ + if ( iob_len ( iobuf ) < sizeof ( *bth ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for BTH\n", + ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + bth = iobuf->data; + iob_pull ( iobuf, sizeof ( *bth ) ); + if ( bth->opcode != BTH_OPCODE_UD_SEND ) { + DBGC ( ibdev, "IBDEV %p unsupported BTH opcode %x\n", + ibdev, bth->opcode ); + return -ENOTSUP; + } + qpn = ntohl ( bth->dest_qp ); + + /* Extract DETH */ + if ( iob_len ( iobuf ) < sizeof ( *deth ) ) { + DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for DETH\n", + ibdev, iob_len ( iobuf ) ); + return -EINVAL; + } + deth = iobuf->data; + iob_pull ( iobuf, sizeof ( *deth ) ); + av->qpn = ntohl ( deth->src_qp ); + av->qkey = ntohl ( deth->qkey ); + + /* Calculate payload length, if applicable */ + if ( payload_len ) { + pad_len = ( ( bth->se__m__padcnt__tver >> 4 ) & 0x3 ); + *payload_len = ( ( ntohs ( lrh->length ) << 2 ) + - ( orig_iob_len - iob_len ( iobuf ) ) + - pad_len - 4 /* ICRC */ ); + } + + /* Determine destination QP, if applicable */ + if ( qp ) { + if ( IB_LID_MULTICAST ( lid ) && grh ) { + *qp = ib_find_qp_mgid ( ibdev, &grh->dgid ); + } else { + *qp = ib_find_qp_qpn ( ibdev, qpn ); + } + if ( ! *qp ) { + DBGC ( ibdev, "IBDEV %p RX for nonexistent QP\n", + ibdev ); + return -ENODEV; + } + } + + DBGC2 ( ibdev, "IBDEV %p RX %04x:%08lx <= %04x:%08lx (key %08lx)\n", + ibdev, lid, + ( IB_LID_MULTICAST( lid ) ? ( qp ? (*qp)->qpn : -1UL ) : qpn ), + av->lid, av->qpn, ntohl ( deth->qkey ) ); + DBGCP_HDA ( ibdev, 0, + ( iobuf->data - ( orig_iob_len - iob_len ( iobuf ) ) ), + ( orig_iob_len - iob_len ( iobuf ) ) ); + + return 0; +} diff --git a/src/include/gpxe/errfile.h b/src/include/gpxe/errfile.h index c314ceed..109e6cd2 100644 --- a/src/include/gpxe/errfile.h +++ b/src/include/gpxe/errfile.h @@ -136,6 +136,7 @@ #define ERRFILE_dhcppkt ( ERRFILE_NET | 0x00150000 ) #define ERRFILE_slam ( ERRFILE_NET | 0x00160000 ) #define ERRFILE_ib_sma ( ERRFILE_NET | 0x00170000 ) +#define ERRFILE_ib_packet ( ERRFILE_NET | 0x00180000 ) #define ERRFILE_image ( ERRFILE_IMAGE | 0x00000000 ) #define ERRFILE_elf ( ERRFILE_IMAGE | 0x00010000 ) diff --git a/src/include/gpxe/ib_packet.h b/src/include/gpxe/ib_packet.h index f9ef2f6e..5374802c 100644 --- a/src/include/gpxe/ib_packet.h +++ b/src/include/gpxe/ib_packet.h @@ -7,6 +7,11 @@ * */ +struct ib_device; +struct ib_queue_pair; +struct ib_address_vector; +struct io_buffer; + /** Half of an Infiniband Global Identifier */ struct ib_gid_half { uint8_t bytes[8]; @@ -53,6 +58,9 @@ enum ib_lnh { /** Default Infiniband LID */ #define IB_LID_NONE 0xffff +/** Test for multicast LID */ +#define IB_LID_MULTICAST( lid ) ( ( (lid) >= 0xc000 ) && ( (lid) <= 0xfffe ) ) + /** An Infiniband Global Route Header */ struct ib_global_route_header { /** IP version, traffic class, and flow label @@ -76,7 +84,6 @@ struct ib_global_route_header { #define IB_GRH_IPVER_IPv6 0x06 #define IB_GRH_NXTHDR_IBA 0x1b -#define IB_GRH_HOPLMT_MAX 0xff /** An Infiniband Base Transport Header */ struct ib_base_transport_header { @@ -111,4 +118,30 @@ struct ib_datagram_extended_transport_header { uint32_t src_qp; } __attribute__ (( packed )); +/** All known IB header formats */ +union ib_headers { + struct ib_local_route_header lrh; + struct { + struct ib_local_route_header lrh; + struct ib_global_route_header grh; + struct ib_base_transport_header bth; + struct ib_datagram_extended_transport_header deth; + } __attribute__ (( packed )) lrh__grh__bth__deth; + struct { + struct ib_local_route_header lrh; + struct ib_base_transport_header bth; + struct ib_datagram_extended_transport_header deth; + } __attribute__ (( packed )) lrh__bth__deth; +} __attribute__ (( packed )); + +/** Maximum size required for IB headers */ +#define IB_MAX_HEADER_SIZE sizeof ( union ib_headers ) + +extern int ib_push ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_queue_pair *qp, size_t payload_len, + const struct ib_address_vector *av ); +extern int ib_pull ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_queue_pair **qp, size_t *payload_len, + struct ib_address_vector *av ); + #endif /* _GPXE_IB_PACKET_H */ diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 2691ffc9..1bb82401 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -55,8 +55,20 @@ struct ib_work_queue { void *drv_priv; }; +/** An Infiniband multicast GID */ +struct ib_multicast_gid { + /** List of multicast GIDs on this QP */ + struct list_head list; + /** Multicast GID */ + struct ib_gid gid; +}; + /** An Infiniband Queue Pair */ struct ib_queue_pair { + /** Containing Infiniband device */ + struct ib_device *ibdev; + /** List of queue pairs on this Infiniband device */ + struct list_head list; /** Queue Pair Number */ unsigned long qpn; /** Queue key */ @@ -65,6 +77,8 @@ struct ib_queue_pair { struct ib_work_queue send; /** Receive queue */ struct ib_work_queue recv; + /** List of multicast GIDs */ + struct list_head mgids; /** Driver private data */ void *drv_priv; /** Queue owner private data */ @@ -286,6 +300,8 @@ struct ib_device { struct list_head list; /** Underlying device */ struct device *dev; + /** List of queue pairs */ + struct list_head qps; /** Infiniband operations */ struct ib_device_operations *op; /** Port number */ @@ -308,6 +324,9 @@ struct ib_device { /** Partition key */ uint16_t pkey; + /** Outbound packet sequence number */ + uint32_t psn; + /** Driver private data */ void *drv_priv; /** Owner private data */ @@ -327,6 +346,10 @@ extern int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp, unsigned long mod_list, unsigned long qkey ); extern void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ); +extern struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev, + unsigned long qpn ); +extern struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev, + struct ib_gid *gid ); extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, unsigned long qpn, int is_send ); extern int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, @@ -341,6 +364,10 @@ extern void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_address_vector *av, struct io_buffer *iobuf, int rc ); +extern int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *gid ); +extern void ib_mcast_detach ( struct ib_device *ibdev, + struct ib_queue_pair *qp, struct ib_gid *gid ); extern struct ib_device * alloc_ibdev ( size_t priv_size ); extern int register_ibdev ( struct ib_device *ibdev ); extern void unregister_ibdev ( struct ib_device *ibdev ); @@ -394,33 +421,6 @@ ib_link_ok ( struct ib_device *ibdev ) { return ( ibdev->port_state == IB_PORT_STATE_ACTIVE ); } -/** - * Attach to multicast group - * - * @v ibdev Infiniband device - * @v qp Queue pair - * @v gid Multicast GID - * @ret rc Return status code - */ -static inline __always_inline int -ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp, - struct ib_gid *gid ) { - return ibdev->op->mcast_attach ( ibdev, qp, gid ); -} - -/** - * Detach from multicast group - * - * @v ibdev Infiniband device - * @v qp Queue pair - * @v gid Multicast GID - */ -static inline __always_inline void -ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp, - struct ib_gid *gid ) { - ibdev->op->mcast_detach ( ibdev, qp, gid ); -} - /** * Get reference to Infiniband device * diff --git a/src/net/infiniband.c b/src/net/infiniband.c index 7ded51e6..fb6fca7d 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -60,7 +60,7 @@ ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes, /* Allocate and initialise data structure */ cq = zalloc ( sizeof ( *cq ) ); if ( ! cq ) - return NULL; + goto err_alloc_cq; cq->num_cqes = num_cqes; INIT_LIST_HEAD ( &cq->work_queues ); cq->op = op; @@ -69,14 +69,19 @@ ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes, if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) { DBGC ( ibdev, "IBDEV %p could not initialise completion " "queue: %s\n", ibdev, strerror ( rc ) ); - free ( cq ); - return NULL; + goto err_dev_create_cq; } DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) " "with CQN %#lx\n", ibdev, num_cqes, cq, ib_cq_get_drvdata ( cq ), cq->cqn ); return cq; + + ibdev->op->destroy_cq ( ibdev, cq ); + err_dev_create_cq: + free ( cq ); + err_alloc_cq: + return NULL; } /** @@ -123,7 +128,9 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) ); qp = zalloc ( total_size ); if ( ! qp ) - return NULL; + goto err_alloc_qp; + qp->ibdev = ibdev; + list_add ( &qp->list, &ibdev->qps ); qp->qkey = qkey; qp->send.qp = qp; qp->send.is_send = 1; @@ -137,15 +144,13 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, qp->recv.num_wqes = num_recv_wqes; qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) )); + INIT_LIST_HEAD ( &qp->mgids ); /* Perform device-specific initialisation and get QPN */ if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) { DBGC ( ibdev, "IBDEV %p could not initialise queue pair: " "%s\n", ibdev, strerror ( rc ) ); - list_del ( &qp->send.list ); - list_del ( &qp->recv.list ); - free ( qp ); - return NULL; + goto err_dev_create_qp; } DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n", @@ -157,6 +162,15 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs, ( ( ( void * ) qp ) + total_size ) ); return qp; + + ibdev->op->destroy_qp ( ibdev, qp ); + err_dev_create_qp: + list_del ( &qp->send.list ); + list_del ( &qp->recv.list ); + list_del ( &qp->list ); + free ( qp ); + err_alloc_qp: + return NULL; } /** @@ -199,6 +213,8 @@ void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) { DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n", ibdev, qp->qpn ); + assert ( list_empty ( &qp->mgids ) ); + /* Perform device-specific destruction */ ibdev->op->destroy_qp ( ibdev, qp ); @@ -219,9 +235,51 @@ void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) { list_del ( &qp->recv.list ); /* Free QP */ + list_del ( &qp->list ); free ( qp ); } +/** + * Find queue pair by QPN + * + * @v ibdev Infiniband device + * @v qpn Queue pair number + * @ret qp Queue pair, or NULL + */ +struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev, + unsigned long qpn ) { + struct ib_queue_pair *qp; + + list_for_each_entry ( qp, &ibdev->qps, list ) { + if ( qp->qpn == qpn ) + return qp; + } + return NULL; +} + +/** + * Find queue pair by multicast GID + * + * @v ibdev Infiniband device + * @v gid Multicast GID + * @ret qp Queue pair, or NULL + */ +struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev, + struct ib_gid *gid ) { + struct ib_queue_pair *qp; + struct ib_multicast_gid *mgid; + + list_for_each_entry ( qp, &ibdev->qps, list ) { + list_for_each_entry ( mgid, &qp->mgids, list ) { + if ( memcmp ( &mgid->gid, gid, + sizeof ( mgid->gid ) ) == 0 ) { + return qp; + } + } + } + return NULL; +} + /** * Find work queue belonging to completion queue * @@ -333,6 +391,66 @@ void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, qp->recv.fill--; } +/** + * Attach to multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + * @ret rc Return status code + */ +int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *gid ) { + struct ib_multicast_gid *mgid; + int rc; + + /* Add to software multicast GID list */ + mgid = zalloc ( sizeof ( *mgid ) ); + if ( ! mgid ) { + rc = -ENOMEM; + goto err_alloc_mgid; + } + memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) ); + list_add ( &mgid->list, &qp->mgids ); + + /* Add to hardware multicast GID list */ + if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 ) + goto err_dev_mcast_attach; + + return 0; + + err_dev_mcast_attach: + list_del ( &mgid->list ); + free ( mgid ); + err_alloc_mgid: + return rc; +} + +/** + * Detach from multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + */ +void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *gid ) { + struct ib_multicast_gid *mgid; + + /* Remove from hardware multicast GID list */ + ibdev->op->mcast_detach ( ibdev, qp, gid ); + + /* Remove from software multicast GID list */ + list_for_each_entry ( mgid, &qp->mgids, list ) { + if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) { + list_del ( &mgid->list ); + free ( mgid ); + break; + } + } +} + + /*************************************************************************** * * Event queues @@ -392,6 +510,7 @@ struct ib_device * alloc_ibdev ( size_t priv_size ) { if ( ibdev ) { drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) ); ib_set_drvdata ( ibdev, drv_priv ); + INIT_LIST_HEAD ( &ibdev->qps ); ibdev->lid = IB_LID_NONE; ibdev->pkey = IB_PKEY_NONE; } -- cgit v1.2.3-55-g7522