summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/drivers/infiniband/hermon.c282
-rw-r--r--src/drivers/infiniband/hermon.h65
-rw-r--r--src/drivers/net/ipoib.c257
-rw-r--r--src/include/gpxe/infiniband.h20
-rw-r--r--src/include/gpxe/ipoib.h1
-rw-r--r--src/net/infiniband.c93
6 files changed, 577 insertions, 141 deletions
diff --git a/src/drivers/infiniband/hermon.c b/src/drivers/infiniband/hermon.c
index c10559f9..41494a5a 100644
--- a/src/drivers/infiniband/hermon.c
+++ b/src/drivers/infiniband/hermon.c
@@ -30,6 +30,7 @@
#include <gpxe/umalloc.h>
#include <gpxe/iobuf.h>
#include <gpxe/netdevice.h>
+#include <gpxe/process.h>
#include <gpxe/infiniband.h>
#include "hermon.h"
@@ -317,19 +318,30 @@ hermon_cmd_write_mtt ( struct hermon *hermon,
}
static inline int
+hermon_cmd_map_eq ( struct hermon *hermon, unsigned long index_map,
+ const struct hermonprm_event_mask *mask ) {
+ return hermon_cmd ( hermon,
+ HERMON_HCR_IN_CMD ( HERMON_HCR_MAP_EQ,
+ 0, sizeof ( *mask ) ),
+ 0, mask, index_map, NULL );
+}
+
+static inline int
hermon_cmd_sw2hw_eq ( struct hermon *hermon, unsigned int index,
- const struct hermonprm_eqc *eqc ) {
+ const struct hermonprm_eqc *eqctx ) {
return hermon_cmd ( hermon,
HERMON_HCR_IN_CMD ( HERMON_HCR_SW2HW_EQ,
- 1, sizeof ( *eqc ) ),
- 0, eqc, index, NULL );
+ 1, sizeof ( *eqctx ) ),
+ 0, eqctx, index, NULL );
}
static inline int
-hermon_cmd_hw2sw_eq ( struct hermon *hermon, unsigned int index ) {
+hermon_cmd_hw2sw_eq ( struct hermon *hermon, unsigned int index,
+ struct hermonprm_eqc *eqctx ) {
return hermon_cmd ( hermon,
- HERMON_HCR_VOID_CMD ( HERMON_HCR_HW2SW_EQ ),
- 1, NULL, index, NULL );
+ HERMON_HCR_OUT_CMD ( HERMON_HCR_HW2SW_EQ,
+ 1, sizeof ( *eqctx ) ),
+ 1, NULL, index, eqctx );
}
static inline int
@@ -378,6 +390,15 @@ hermon_cmd_rtr2rts_qp ( struct hermon *hermon, unsigned long qpn,
}
static inline int
+hermon_cmd_rts2rts_qp ( struct hermon *hermon, unsigned long qpn,
+ const struct hermonprm_qp_ee_state_transitions *ctx ) {
+ return hermon_cmd ( hermon,
+ HERMON_HCR_IN_CMD ( HERMON_HCR_RTS2RTS_QP,
+ 1, sizeof ( *ctx ) ),
+ 0, ctx, qpn, NULL );
+}
+
+static inline int
hermon_cmd_2rst_qp ( struct hermon *hermon, unsigned long qpn ) {
return hermon_cmd ( hermon,
HERMON_HCR_VOID_CMD ( HERMON_HCR_2RST_QP ),
@@ -860,6 +881,39 @@ static int hermon_create_qp ( struct ib_device *ibdev,
}
/**
+ * Modify queue pair
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v mod_list Modification list
+ * @ret rc Return status code
+ */
+static int hermon_modify_qp ( struct ib_device *ibdev,
+ struct ib_queue_pair *qp,
+ unsigned long mod_list ) {
+ struct hermon *hermon = ib_get_drvdata ( ibdev );
+ struct hermonprm_qp_ee_state_transitions qpctx;
+ unsigned long optparammask = 0;
+ int rc;
+
+ /* Construct optparammask */
+ if ( mod_list & IB_MODIFY_QKEY )
+ optparammask |= HERMON_QP_OPT_PARAM_QKEY;
+
+ /* Issue RTS2RTS_QP */
+ memset ( &qpctx, 0, sizeof ( qpctx ) );
+ MLX_FILL_1 ( &qpctx, 0, opt_param_mask, optparammask );
+ MLX_FILL_1 ( &qpctx, 44, qpc_eec_data.q_key, qp->qkey );
+ if ( ( rc = hermon_cmd_rts2rts_qp ( hermon, qp->qpn, &qpctx ) ) != 0 ){
+ DBGC ( hermon, "Hermon %p RTS2RTS_QP failed: %s\n",
+ hermon, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
* Destroy queue pair
*
* @v ibdev Infiniband device
@@ -1356,6 +1410,7 @@ static struct ib_device_operations hermon_ib_operations = {
.create_cq = hermon_create_cq,
.destroy_cq = hermon_destroy_cq,
.create_qp = hermon_create_qp,
+ .modify_qp = hermon_modify_qp,
.destroy_qp = hermon_destroy_qp,
.post_send = hermon_post_send,
.post_recv = hermon_post_recv,
@@ -1369,6 +1424,211 @@ static struct ib_device_operations hermon_ib_operations = {
/***************************************************************************
*
+ * Event queues
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Create event queue
+ *
+ * @v hermon Hermon device
+ * @ret rc Return status code
+ */
+static int hermon_create_eq ( struct hermon *hermon ) {
+ struct hermon_event_queue *hermon_eq = &hermon->eq;
+ struct hermonprm_eqc eqctx;
+ struct hermonprm_event_mask mask;
+ unsigned int i;
+ int rc;
+
+ /* Allocate event queue itself */
+ hermon_eq->eqe_size =
+ ( HERMON_NUM_EQES * sizeof ( hermon_eq->eqe[0] ) );
+ hermon_eq->eqe = malloc_dma ( hermon_eq->eqe_size,
+ sizeof ( hermon_eq->eqe[0] ) );
+ if ( ! hermon_eq->eqe ) {
+ rc = -ENOMEM;
+ goto err_eqe;
+ }
+ memset ( hermon_eq->eqe, 0, hermon_eq->eqe_size );
+ for ( i = 0 ; i < HERMON_NUM_EQES ; i++ ) {
+ MLX_FILL_1 ( &hermon_eq->eqe[i].generic, 7, owner, 1 );
+ }
+ barrier();
+
+ /* Allocate MTT entries */
+ if ( ( rc = hermon_alloc_mtt ( hermon, hermon_eq->eqe,
+ hermon_eq->eqe_size,
+ &hermon_eq->mtt ) ) != 0 )
+ goto err_alloc_mtt;
+
+ /* Hand queue over to hardware */
+ memset ( &eqctx, 0, sizeof ( eqctx ) );
+ MLX_FILL_1 ( &eqctx, 0, st, 0xa /* "Fired" */ );
+ MLX_FILL_1 ( &eqctx, 2,
+ page_offset, ( hermon_eq->mtt.page_offset >> 5 ) );
+ MLX_FILL_1 ( &eqctx, 3, log_eq_size, fls ( HERMON_NUM_EQES - 1 ) );
+ MLX_FILL_1 ( &eqctx, 7, mtt_base_addr_l,
+ ( hermon_eq->mtt.mtt_base_addr >> 3 ) );
+ if ( ( rc = hermon_cmd_sw2hw_eq ( hermon, 0, &eqctx ) ) != 0 ) {
+ DBGC ( hermon, "Hermon %p SW2HW_EQ failed: %s\n",
+ hermon, strerror ( rc ) );
+ goto err_sw2hw_eq;
+ }
+
+ /* Map events to this event queue */
+ memset ( &mask, 0, sizeof ( mask ) );
+ MLX_FILL_1 ( &mask, 1, port_state_change, 1 );
+ if ( ( rc = hermon_cmd_map_eq ( hermon, ( HERMON_MAP_EQ_MAP | 0 ),
+ &mask ) ) != 0 ) {
+ DBGC ( hermon, "Hermon %p MAP_EQ failed: %s\n",
+ hermon, strerror ( rc ) );
+ goto err_map_eq;
+ }
+
+ return 0;
+
+ err_map_eq:
+ hermon_cmd_hw2sw_eq ( hermon, 0, &eqctx );
+ err_sw2hw_eq:
+ hermon_free_mtt ( hermon, &hermon_eq->mtt );
+ err_alloc_mtt:
+ free_dma ( hermon_eq->eqe, hermon_eq->eqe_size );
+ err_eqe:
+ memset ( hermon_eq, 0, sizeof ( *hermon_eq ) );
+ return rc;
+}
+
+/**
+ * Destroy event queue
+ *
+ * @v hermon Hermon device
+ */
+static void hermon_destroy_eq ( struct hermon *hermon ) {
+ struct hermon_event_queue *hermon_eq = &hermon->eq;
+ struct hermonprm_eqc eqctx;
+ struct hermonprm_event_mask mask;
+ int rc;
+
+ /* Unmap events from event queue */
+ memset ( &mask, 0, sizeof ( mask ) );
+ MLX_FILL_1 ( &mask, 1, port_state_change, 1 );
+ if ( ( rc = hermon_cmd_map_eq ( hermon, ( HERMON_MAP_EQ_UNMAP | 0 ),
+ &mask ) ) != 0 ) {
+ DBGC ( hermon, "Hermon %p FATAL MAP_EQ failed to unmap: %s\n",
+ hermon, strerror ( rc ) );
+ /* Continue; HCA may die but system should survive */
+ }
+
+ /* Take ownership back from hardware */
+ if ( ( rc = hermon_cmd_hw2sw_eq ( hermon, 0, &eqctx ) ) != 0 ) {
+ DBGC ( hermon, "Hermon %p FATAL HW2SW_EQ failed: %s\n",
+ hermon, strerror ( rc ) );
+ /* Leak memory and return; at least we avoid corruption */
+ return;
+ }
+
+ /* Free MTT entries */
+ hermon_free_mtt ( hermon, &hermon_eq->mtt );
+
+ /* Free memory */
+ free_dma ( hermon_eq->eqe, hermon_eq->eqe_size );
+ memset ( hermon_eq, 0, sizeof ( *hermon_eq ) );
+}
+
+/**
+ * Handle port state event
+ *
+ * @v hermon Hermon device
+ * @v eqe Port state change event queue entry
+ */
+static void hermon_event_port_state_change ( struct hermon *hermon,
+ union hermonprm_event_entry *eqe){
+ unsigned int port;
+ int link_up;
+
+ /* Get port and link status */
+ port = ( MLX_GET ( &eqe->port_state_change, data.p ) - 1 );
+ link_up = ( MLX_GET ( &eqe->generic, event_sub_type ) & 0x04 );
+ DBGC ( hermon, "Hermon %p port %d link %s\n", hermon, ( port + 1 ),
+ ( link_up ? "up" : "down" ) );
+
+ /* Sanity check */
+ if ( port >= HERMON_NUM_PORTS ) {
+ DBGC ( hermon, "Hermon %p port %d does not exist!\n",
+ hermon, ( port + 1 ) );
+ return;
+ }
+
+ /* Notify Infiniband core of link state change */
+ ib_link_state_changed ( hermon->ibdev[port] );
+}
+
+/**
+ * Poll event queue
+ *
+ * @v hermon Hermon device
+ */
+static void hermon_poll_eq ( struct hermon *hermon ) {
+ struct hermon_event_queue *hermon_eq = &hermon->eq;
+ union hermonprm_event_entry *eqe;
+ union hermonprm_doorbell_register db_reg;
+ unsigned int eqe_idx_mask;
+ unsigned int event_type;
+
+ while ( 1 ) {
+ eqe_idx_mask = ( HERMON_NUM_EQES - 1 );
+ eqe = &hermon_eq->eqe[hermon_eq->next_idx & eqe_idx_mask];
+ if ( MLX_GET ( &eqe->generic, owner ) ^
+ ( ( hermon_eq->next_idx & HERMON_NUM_EQES ) ? 1 : 0 ) ) {
+ /* Entry still owned by hardware; end of poll */
+ break;
+ }
+ DBGCP ( hermon, "Hermon %p event:\n", hermon );
+ DBGCP_HD ( hermon, eqe, sizeof ( *eqe ) );
+
+ /* Handle event */
+ event_type = MLX_GET ( &eqe->generic, event_type );
+ switch ( event_type ) {
+ case HERMON_EV_PORT_STATE_CHANGE:
+ hermon_event_port_state_change ( hermon, eqe );
+ break;
+ default:
+ DBGC ( hermon, "Hermon %p unrecognised event type "
+ "%#x:\n", hermon, event_type );
+ DBGC_HD ( hermon, eqe, sizeof ( *eqe ) );
+ break;
+ }
+
+ /* Update event queue's index */
+ hermon_eq->next_idx++;
+
+ /* Ring doorbell */
+ memset ( &db_reg, 0, sizeof ( db_reg ) );
+ MLX_FILL_1 ( &db_reg.event, 0, ci, hermon_eq->next_idx );
+ DBGCP ( hermon, "Ringing doorbell %08lx with %08lx\n",
+ virt_to_phys ( hermon->uar + HERMON_DB_EQ0_OFFSET ),
+ db_reg.dword[0] );
+ writel ( db_reg.dword[0],
+ ( hermon->uar + HERMON_DB_EQ0_OFFSET ) );
+ }
+}
+
+/**
+ * Event queue poll processor
+ *
+ * @v process Hermon event queue process
+ */
+static void hermon_step ( struct process *process ) {
+ struct hermon *hermon =
+ container_of ( process, struct hermon, event_process );
+
+ hermon_poll_eq ( hermon );
+}
+
+/***************************************************************************
+ *
* Firmware control
*
***************************************************************************
@@ -1879,6 +2139,7 @@ static int hermon_probe ( struct pci_device *pci,
goto err_alloc_hermon;
}
pci_set_drvdata ( pci, hermon );
+ process_init ( &hermon->event_process, hermon_step, NULL );
/* Allocate Infiniband devices */
for ( i = 0 ; i < HERMON_NUM_PORTS ; i++ ) {
@@ -1945,6 +2206,10 @@ static int hermon_probe ( struct pci_device *pci,
if ( ( rc = hermon_setup_mpt ( hermon ) ) != 0 )
goto err_setup_mpt;
+ /* Set up event queue */
+ if ( ( rc = hermon_create_eq ( hermon ) ) != 0 )
+ goto err_create_eq;
+
/* Register Infiniband devices */
for ( i = 0 ; i < HERMON_NUM_PORTS ; i++ ) {
if ( ( rc = register_ibdev ( hermon->ibdev[i] ) ) != 0 ) {
@@ -1960,6 +2225,8 @@ static int hermon_probe ( struct pci_device *pci,
err_register_ibdev:
for ( ; i >= 0 ; i-- )
unregister_ibdev ( hermon->ibdev[i] );
+ hermon_destroy_eq ( hermon );
+ err_create_eq:
err_setup_mpt:
hermon_cmd_close_hca ( hermon );
err_init_hca:
@@ -1976,6 +2243,7 @@ static int hermon_probe ( struct pci_device *pci,
err_alloc_ibdev:
for ( ; i >= 0 ; i-- )
free_ibdev ( hermon->ibdev[i] );
+ process_del ( &hermon->event_process );
free ( hermon );
err_alloc_hermon:
return rc;
@@ -1992,6 +2260,7 @@ static void hermon_remove ( struct pci_device *pci ) {
for ( i = ( HERMON_NUM_PORTS - 1 ) ; i >= 0 ; i-- )
unregister_ibdev ( hermon->ibdev[i] );
+ hermon_destroy_eq ( hermon );
hermon_cmd_close_hca ( hermon );
hermon_free_icm ( hermon );
hermon_stop_firmware ( hermon );
@@ -2000,6 +2269,7 @@ static void hermon_remove ( struct pci_device *pci ) {
free_dma ( hermon->mailbox_in, HERMON_MBOX_SIZE );
for ( i = ( HERMON_NUM_PORTS - 1 ) ; i >= 0 ; i-- )
free_ibdev ( hermon->ibdev[i] );
+ process_del ( &hermon->event_process );
free ( hermon );
}
diff --git a/src/drivers/infiniband/hermon.h b/src/drivers/infiniband/hermon.h
index 959e6a9d..d9e3dd11 100644
--- a/src/drivers/infiniband/hermon.h
+++ b/src/drivers/infiniband/hermon.h
@@ -9,6 +9,7 @@
#include <stdint.h>
#include <gpxe/uaccess.h>
+#include <gpxe/process.h>
#include "mlx_bitops.h"
#include "MT25408_PRM.h"
@@ -18,7 +19,7 @@
*/
/* Ports in existence */
-#define HERMON_NUM_PORTS 1
+#define HERMON_NUM_PORTS 2
#define HERMON_PORT_BASE 1
/* PCI BARs */
@@ -48,6 +49,7 @@
#define HERMON_HCR_RST2INIT_QP 0x0019
#define HERMON_HCR_INIT2RTR_QP 0x001a
#define HERMON_HCR_RTR2RTS_QP 0x001b
+#define HERMON_HCR_RTS2RTS_QP 0x001c
#define HERMON_HCR_2RST_QP 0x0021
#define HERMON_HCR_MAD_IFC 0x0024
#define HERMON_HCR_READ_MCG 0x0025
@@ -75,6 +77,14 @@
#define HERMON_PAGE_SIZE 4096
#define HERMON_DB_POST_SND_OFFSET 0x14
+#define HERMON_DB_EQ0_OFFSET 0x800
+
+#define HERMON_QP_OPT_PARAM_QKEY 0x00000020UL
+
+#define HERMON_MAP_EQ_MAP ( 0UL << 31 )
+#define HERMON_MAP_EQ_UNMAP ( 1UL << 31 )
+
+#define HERMON_EV_PORT_STATE_CHANGE 0x09
/*
* Datatypes that seem to be missing from the autogenerated documentation
@@ -108,12 +118,32 @@ struct hermonprm_send_db_register_st {
pseudo_bit_t qn[0x00018];
} __attribute__ (( packed ));
+struct hermonprm_event_db_register_st {
+ pseudo_bit_t ci[0x00018];
+ pseudo_bit_t reserver[0x00007];
+ pseudo_bit_t a[0x00001];
+} __attribute__ (( packed ));
+
struct hermonprm_scalar_parameter_st {
pseudo_bit_t value_hi[0x00020];
/* -------------- */
pseudo_bit_t value[0x00020];
} __attribute__ (( packed ));
+struct hermonprm_event_mask_st {
+ pseudo_bit_t reserved0[0x00020];
+/* -------------- */
+ pseudo_bit_t completion[0x00001];
+ pseudo_bit_t reserved1[0x0008];
+ pseudo_bit_t port_state_change[0x00001];
+ pseudo_bit_t reserved2[0x00016];
+} __attribute__ (( packed ));
+
+struct hermonprm_port_state_change_event_st {
+ pseudo_bit_t reserved[0x00020];
+ struct hermonprm_port_state_change_st data;
+} __attribute__ (( packed ));
+
/*
* Wrapper structures for hardware datatypes
*
@@ -124,6 +154,9 @@ struct MLX_DECLARE_STRUCT ( hermonprm_completion_queue_entry );
struct MLX_DECLARE_STRUCT ( hermonprm_completion_with_error );
struct MLX_DECLARE_STRUCT ( hermonprm_cq_db_record );
struct MLX_DECLARE_STRUCT ( hermonprm_eqc );
+struct MLX_DECLARE_STRUCT ( hermonprm_event_db_register );
+struct MLX_DECLARE_STRUCT ( hermonprm_event_mask );
+struct MLX_DECLARE_STRUCT ( hermonprm_event_queue_entry );
struct MLX_DECLARE_STRUCT ( hermonprm_hca_command_register );
struct MLX_DECLARE_STRUCT ( hermonprm_init_hca );
struct MLX_DECLARE_STRUCT ( hermonprm_init_port );
@@ -132,6 +165,7 @@ struct MLX_DECLARE_STRUCT ( hermonprm_mcg_entry );
struct MLX_DECLARE_STRUCT ( hermonprm_mgm_hash );
struct MLX_DECLARE_STRUCT ( hermonprm_mpt );
struct MLX_DECLARE_STRUCT ( hermonprm_mtt );
+struct MLX_DECLARE_STRUCT ( hermonprm_port_state_change_event );
struct MLX_DECLARE_STRUCT ( hermonprm_qp_db_record );
struct MLX_DECLARE_STRUCT ( hermonprm_qp_ee_state_transitions );
struct MLX_DECLARE_STRUCT ( hermonprm_query_dev_cap );
@@ -175,8 +209,14 @@ union hermonprm_completion_entry {
struct hermonprm_completion_with_error error;
} __attribute__ (( packed ));
+union hermonprm_event_entry {
+ struct hermonprm_event_queue_entry generic;
+ struct hermonprm_port_state_change_event port_state_change;
+} __attribute__ (( packed ));
+
union hermonprm_doorbell_register {
struct hermonprm_send_db_register send;
+ struct hermonprm_event_db_register event;
uint32_t dword[1];
} __attribute__ (( packed ));
@@ -362,6 +402,24 @@ struct hermon_completion_queue {
*/
#define HERMON_MAX_EQS 4
+/** A Hermon event queue */
+struct hermon_event_queue {
+ /** Event queue entries */
+ union hermonprm_event_entry *eqe;
+ /** Size of event queue */
+ size_t eqe_size;
+ /** MTT descriptor */
+ struct hermon_mtt mtt;
+ /** Next event queue entry index */
+ unsigned long next_idx;
+};
+
+/** Number of event queue entries
+ *
+ * This is a policy decision.
+ */
+#define HERMON_NUM_EQES 4
+
/** A Hermon resource bitmask */
typedef uint32_t hermon_bitmask_t;
@@ -397,6 +455,11 @@ struct hermon {
*/
unsigned long reserved_lkey;
+ /** Event queue */
+ struct hermon_event_queue eq;
+ /** Event queue process */
+ struct process event_process;
+
/** Completion queue in-use bitmask */
hermon_bitmask_t cq_inuse[ HERMON_BITMASK_SIZE ( HERMON_MAX_CQS ) ];
/** Queue pair in-use bitmask */
diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c
index d457b258..3b915bf0 100644
--- a/src/drivers/net/ipoib.c
+++ b/src/drivers/net/ipoib.c
@@ -80,10 +80,14 @@ struct ipoib_device {
struct ib_gid broadcast_gid;
/** Broadcast LID */
unsigned int broadcast_lid;
- /** Joined to broadcast group */
- int broadcast_joined;
/** Data queue key */
unsigned long data_qkey;
+ /** Attached to multicast group
+ *
+ * This flag indicates whether or not we have attached our
+ * data queue pair to the broadcast multicast GID.
+ */
+ int broadcast_attached;
};
/**
@@ -272,6 +276,10 @@ static int ipoib_create_qset ( struct ipoib_device *ipoib,
struct ib_device *ibdev = ipoib->ibdev;
int rc;
+ /* Sanity check */
+ assert ( qset->cq == NULL );
+ assert ( qset->qp == NULL );
+
/* Store queue parameters */
qset->recv_max_fill = num_recv_wqes;
@@ -617,14 +625,24 @@ static void ipoib_recv_path_record ( struct ipoib_device *ipoib __unused,
*/
static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib,
struct ib_mad_mc_member_record *mc_member_record ) {
+ int joined;
+ int rc;
+
/* Record parameters */
- ipoib->broadcast_joined =
- ( mc_member_record->scope__join_state & 0x0f );
+ joined = ( mc_member_record->scope__join_state & 0x0f );
ipoib->data_qkey = ntohl ( mc_member_record->qkey );
ipoib->broadcast_lid = ntohs ( mc_member_record->mlid );
DBGC ( ipoib, "IPoIB %p %s broadcast group: qkey %lx mlid %x\n",
- ipoib, ( ipoib->broadcast_joined ? "joined" : "left" ),
- ipoib->data_qkey, ipoib->broadcast_lid );
+ ipoib, ( joined ? "joined" : "left" ), ipoib->data_qkey,
+ ipoib->broadcast_lid );
+
+ /* Update data queue pair qkey */
+ if ( ( rc = ib_modify_qp ( ipoib->ibdev, ipoib->data.qp,
+ IB_MODIFY_QKEY, ipoib->data_qkey ) ) != 0 ){
+ DBGC ( ipoib, "IPoIB %p could not update data qkey: %s\n",
+ ipoib, strerror ( rc ) );
+ return;
+ }
}
/**
@@ -742,6 +760,56 @@ static void ipoib_irq ( struct net_device *netdev __unused,
}
/**
+ * Join IPv4 broadcast multicast group
+ *
+ * @v ipoib IPoIB device
+ * @ret rc Return status code
+ */
+static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
+ int rc;
+
+ /* Sanity check */
+ if ( ! ipoib->data.qp )
+ return 0;
+
+ /* Attach data queue to broadcast multicast GID */
+ assert ( ipoib->broadcast_attached == 0 );
+ if ( ( rc = ib_mcast_attach ( ipoib->ibdev, ipoib->data.qp,
+ &ipoib->broadcast_gid ) ) != 0 ){
+ DBGC ( ipoib, "IPoIB %p could not attach to broadcast GID: "
+ "%s\n", ipoib, strerror ( rc ) );
+ return rc;
+ }
+ ipoib->broadcast_attached = 1;
+
+ /* Initiate broadcast group join */
+ if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid,
+ 1 ) ) != 0 ) {
+ DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n",
+ ipoib, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Leave IPv4 broadcast multicast group
+ *
+ * @v ipoib IPoIB device
+ */
+static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
+
+ /* Detach data queue from broadcast multicast GID */
+ if ( ipoib->broadcast_attached ) {
+ assert ( ipoib->data.qp != NULL );
+ ib_mcast_detach ( ipoib->ibdev, ipoib->data.qp,
+ &ipoib->broadcast_gid );
+ ipoib->broadcast_attached = 0;
+ }
+}
+
+/**
* Open IPoIB network device
*
* @v netdev Network device
@@ -749,22 +817,53 @@ static void ipoib_irq ( struct net_device *netdev __unused,
*/
static int ipoib_open ( struct net_device *netdev ) {
struct ipoib_device *ipoib = netdev->priv;
- struct ib_device *ibdev = ipoib->ibdev;
+ struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
int rc;
- /* Attach to broadcast multicast GID */
- if ( ( rc = ib_mcast_attach ( ibdev, ipoib->data.qp,
- &ipoib->broadcast_gid ) ) != 0 ) {
- DBG ( "Could not attach to broadcast GID: %s\n",
- strerror ( rc ) );
- return rc;
+ /* Allocate metadata queue set */
+ if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta,
+ IPOIB_META_NUM_CQES,
+ IPOIB_META_NUM_SEND_WQES,
+ IPOIB_META_NUM_RECV_WQES,
+ IB_GLOBAL_QKEY ) ) != 0 ) {
+ DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n",
+ ipoib, strerror ( rc ) );
+ goto err_create_meta_qset;
}
+ /* Allocate data queue set */
+ if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data,
+ IPOIB_DATA_NUM_CQES,
+ IPOIB_DATA_NUM_SEND_WQES,
+ IPOIB_DATA_NUM_RECV_WQES,
+ IB_GLOBAL_QKEY ) ) != 0 ) {
+ DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n",
+ ipoib, strerror ( rc ) );
+ goto err_create_data_qset;
+ }
+
+ /* Update MAC address with data QPN */
+ mac->qpn = htonl ( ipoib->data.qp->qpn );
+
/* Fill receive rings */
ipoib_refill_recv ( ipoib, &ipoib->meta );
ipoib_refill_recv ( ipoib, &ipoib->data );
+ /* Join broadcast group */
+ if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
+ DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
+ ipoib, strerror ( rc ) );
+ goto err_join_broadcast;
+ }
+
return 0;
+
+ err_join_broadcast:
+ ipoib_destroy_qset ( ipoib, &ipoib->data );
+ err_create_data_qset:
+ ipoib_destroy_qset ( ipoib, &ipoib->meta );
+ err_create_meta_qset:
+ return rc;
}
/**
@@ -774,12 +873,17 @@ static int ipoib_open ( struct net_device *netdev ) {
*/
static void ipoib_close ( struct net_device *netdev ) {
struct ipoib_device *ipoib = netdev->priv;
- struct ib_device *ibdev = ipoib->ibdev;
+ struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
- /* Detach from broadcast multicast GID */
- ib_mcast_detach ( ibdev, ipoib->data.qp, &ipoib->broadcast_gid );
+ /* Leave broadcast group */
+ ipoib_leave_broadcast_group ( ipoib );
- /* FIXME: should probably flush the receive ring */
+ /* Remove data QPN from MAC address */
+ mac->qpn = 0;
+
+ /* Tear down the queues */
+ ipoib_destroy_qset ( ipoib, &ipoib->data );
+ ipoib_destroy_qset ( ipoib, &ipoib->meta );
}
/** IPoIB network device operations */
@@ -792,44 +896,53 @@ static struct net_device_operations ipoib_operations = {
};
/**
- * Join IPoIB broadcast group
+ * Update IPoIB dynamic Infiniband parameters
*
* @v ipoib IPoIB device
- * @ret rc Return status code
+ *
+ * The Infiniband port GID and partition key will change at runtime,
+ * when the link is established (or lost). The MAC address is based
+ * on the port GID, and the broadcast GID is based on the partition
+ * key. This function recalculates these IPoIB device parameters.
*/
-static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
+static void ipoib_set_ib_params ( struct ipoib_device *ipoib ) {
struct ib_device *ibdev = ipoib->ibdev;
- unsigned int delay_ms;
- int rc;
+ struct ipoib_mac *mac;
- /* Make sure we have some receive descriptors */
- ipoib_refill_recv ( ipoib, &ipoib->meta );
+ /* Calculate GID portion of MAC address based on port GID */
+ mac = ( ( struct ipoib_mac * ) ipoib->netdev->ll_addr );
+ memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) );
- /* Send join request */
- if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid,
- 1 ) ) != 0 ) {
- DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n",
- ipoib, strerror ( rc ) );
- return rc;
- }
+ /* Calculate broadcast GID based on partition key */
+ memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid,
+ sizeof ( ipoib->broadcast_gid ) );
+ ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey );
+}
+
+/**
+ * Handle link status change
+ *
+ * @v ibdev Infiniband device
+ */
+void ipoib_link_state_changed ( struct ib_device *ibdev ) {
+ struct net_device *netdev = ib_get_ownerdata ( ibdev );
+ struct ipoib_device *ipoib = netdev->priv;
+ int rc;
- /* Wait for join to complete. Ideally we wouldn't delay for
- * this long, but we need the queue key before we can set up
- * the data queue pair, which we need before we can know the
- * MAC address.
+ /* Leave existing broadcast group */
+ ipoib_leave_broadcast_group ( ipoib );
+
+ /* Update MAC address and broadcast GID based on new port GID
+ * and partition key.
*/
- for ( delay_ms = IPOIB_JOIN_MAX_DELAY_MS ; delay_ms ; delay_ms-- ) {
- mdelay ( 1 );
- ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send,
- ipoib_meta_complete_recv );
- ipoib_refill_recv ( ipoib, &ipoib->meta );
- if ( ipoib->broadcast_joined )
- return 0;
- }
- DBGC ( ipoib, "IPoIB %p timed out waiting for broadcast join\n",
- ipoib );
+ ipoib_set_ib_params ( ipoib );
- return -ETIMEDOUT;
+ /* Join new broadcast group */
+ if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
+ DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
+ "%s\n", ipoib, strerror ( rc ) );
+ return;
+ }
}
/**
@@ -841,7 +954,6 @@ static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
int ipoib_probe ( struct ib_device *ibdev ) {
struct net_device *netdev;
struct ipoib_device *ipoib;
- struct ipoib_mac *mac;
int rc;
/* Allocate network device */
@@ -856,44 +968,11 @@ int ipoib_probe ( struct ib_device *ibdev ) {
ipoib->netdev = netdev;
ipoib->ibdev = ibdev;
- /* Calculate broadcast GID */
- memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid,
- sizeof ( ipoib->broadcast_gid ) );
- ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey );
-
- /* Allocate metadata queue set */
- if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta,
- IPOIB_META_NUM_CQES,
- IPOIB_META_NUM_SEND_WQES,
- IPOIB_META_NUM_RECV_WQES,
- IB_GLOBAL_QKEY ) ) != 0 ) {
- DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n",
- ipoib, strerror ( rc ) );
- goto err_create_meta_qset;
- }
-
- /* Join broadcast group */
- if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
- DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
- ipoib, strerror ( rc ) );
- goto err_join_broadcast_group;
- }
-
- /* Allocate data queue set */
- if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data,
- IPOIB_DATA_NUM_CQES,
- IPOIB_DATA_NUM_SEND_WQES,
- IPOIB_DATA_NUM_RECV_WQES,
- ipoib->data_qkey ) ) != 0 ) {
- DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n",
- ipoib, strerror ( rc ) );
- goto err_create_data_qset;
- }
-
- /* Construct MAC address */
- mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
- mac->qpn = htonl ( ipoib->data.qp->qpn );
- memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) );
+ /* Calculate as much of the broadcast GID and the MAC address
+ * as we can. We won't know either of these in full until we
+ * have link-up.
+ */
+ ipoib_set_ib_params ( ipoib );
/* Register network device */
if ( ( rc = register_netdev ( netdev ) ) != 0 )
@@ -902,11 +981,6 @@ int ipoib_probe ( struct ib_device *ibdev ) {
return 0;
err_register_netdev:
- ipoib_destroy_qset ( ipoib, &ipoib->data );
- err_join_broadcast_group:
- err_create_data_qset:
- ipoib_destroy_qset ( ipoib, &ipoib->meta );
- err_create_meta_qset:
netdev_nullify ( netdev );
netdev_put ( netdev );
return rc;
@@ -919,11 +993,8 @@ int ipoib_probe ( struct ib_device *ibdev ) {
*/
void ipoib_remove ( struct ib_device *ibdev ) {
struct net_device *netdev = ib_get_ownerdata ( ibdev );
- struct ipoib_device *ipoib = netdev->priv;
unregister_netdev ( netdev );
- ipoib_destroy_qset ( ipoib, &ipoib->data );
- ipoib_destroy_qset ( ipoib, &ipoib->meta );
netdev_nullify ( netdev );
netdev_put ( netdev );
}
diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h
index 354dc579..8fc928c7 100644
--- a/src/include/gpxe/infiniband.h
+++ b/src/include/gpxe/infiniband.h
@@ -95,6 +95,11 @@ struct ib_queue_pair {
void *owner_priv;
};
+/** Infiniband queue pair modification flags */
+enum ib_queue_pair_mods {
+ IB_MODIFY_QKEY = 0x0001,
+};
+
/** An Infiniband Completion Queue */
struct ib_completion_queue {
/** Completion queue number */
@@ -187,6 +192,16 @@ struct ib_device_operations {
*/
int ( * create_qp ) ( struct ib_device *ibdev,
struct ib_queue_pair *qp );
+ /** Modify queue pair
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v mod_list Modification list
+ * @ret rc Return status code
+ */
+ int ( * modify_qp ) ( struct ib_device *ibdev,
+ struct ib_queue_pair *qp,
+ unsigned long mod_list );
/** Destroy queue pair
*
* @v ibdev Infiniband device
@@ -291,6 +306,8 @@ struct ib_device {
struct ib_device_operations *op;
/** Port number */
unsigned int port;
+ /** Link state */
+ int link_up;
/** Port GID */
struct ib_gid port_gid;
/** Subnet manager LID */
@@ -311,6 +328,8 @@ extern struct ib_queue_pair *
ib_create_qp ( struct ib_device *ibdev, unsigned int num_send_wqes,
struct ib_completion_queue *send_cq, unsigned int num_recv_wqes,
struct ib_completion_queue *recv_cq, unsigned long qkey );
+extern int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ unsigned long mod_list, unsigned long qkey );
extern void ib_destroy_qp ( struct ib_device *ibdev,
struct ib_queue_pair *qp );
extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
@@ -319,6 +338,7 @@ extern struct ib_device * alloc_ibdev ( size_t priv_size );
extern int register_ibdev ( struct ib_device *ibdev );
extern void unregister_ibdev ( struct ib_device *ibdev );
extern void free_ibdev ( struct ib_device *ibdev );
+extern void ib_link_state_changed ( struct ib_device *ibdev );
/**
* Post send work queue entry
diff --git a/src/include/gpxe/ipoib.h b/src/include/gpxe/ipoib.h
index 0551687d..bcbdc4c6 100644
--- a/src/include/gpxe/ipoib.h
+++ b/src/include/gpxe/ipoib.h
@@ -72,6 +72,7 @@ static inline struct net_device * alloc_ipoibdev ( size_t priv_size ) {
return netdev;
}
+extern void ipoib_link_state_changed ( struct ib_device *ibdev );
extern int ipoib_probe ( struct ib_device *ibdev );
extern void ipoib_remove ( struct ib_device *ibdev );
diff --git a/src/net/infiniband.c b/src/net/infiniband.c
index 39d11285..e5c79e96 100644
--- a/src/net/infiniband.c
+++ b/src/net/infiniband.c
@@ -153,14 +153,40 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
}
/**
+ * Modify queue pair
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v mod_list Modification list
+ * @v qkey New queue key, if applicable
+ * @ret rc Return status code
+ */
+int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ unsigned long mod_list, unsigned long qkey ) {
+ int rc;
+
+ DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
+
+ if ( mod_list & IB_MODIFY_QKEY )
+ qp->qkey = qkey;
+
+ if ( ( rc = ibdev->op->modify_qp ( ibdev, qp, mod_list ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
+ ibdev, qp->qpn, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
* Destroy queue pair
*
* @v ibdev Infiniband device
* @v qp Queue pair
*/
-void ib_destroy_qp ( struct ib_device *ibdev,
- struct ib_queue_pair *qp ) {
- DBGC ( ibdev, "IBDEV %p destroying queue pair %#lx\n",
+void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
+ DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
ibdev, qp->qpn );
ibdev->op->destroy_qp ( ibdev, qp );
list_del ( &qp->send.list );
@@ -280,38 +306,6 @@ static int ib_get_pkey_table ( struct ib_device *ibdev,
}
/**
- * Wait for link up
- *
- * @v ibdev Infiniband device
- * @ret rc Return status code
- *
- * This function shouldn't really exist. Unfortunately, IB links take
- * a long time to come up, and we can't get various key parameters
- * e.g. our own IPoIB MAC address without information from the subnet
- * manager). We should eventually make link-up an asynchronous event.
- */
-static int ib_wait_for_link ( struct ib_device *ibdev ) {
- struct ib_mad_port_info port_info;
- unsigned int retries;
- int rc;
-
- printf ( "Waiting for Infiniband link-up..." );
- for ( retries = 20 ; retries ; retries-- ) {
- if ( ( rc = ib_get_port_info ( ibdev, &port_info ) ) != 0 )
- continue;
- if ( ( ( port_info.port_state__link_speed_supported ) & 0xf )
- == 4 ) {
- printf ( "ok\n" );
- return 0;
- }
- printf ( "." );
- sleep ( 1 );
- }
- printf ( "failed\n" );
- return -ENODEV;
-};
-
-/**
* Get MAD parameters
*
* @v ibdev Infiniband device
@@ -326,9 +320,13 @@ static int ib_get_mad_params ( struct ib_device *ibdev ) {
} u;
int rc;
- /* Port info gives us the first half of the port GID and the SM LID */
+ /* Port info gives us the link state, the first half of the
+ * port GID and the SM LID.
+ */
if ( ( rc = ib_get_port_info ( ibdev, &u.port_info ) ) != 0 )
return rc;
+ ibdev->link_up = ( ( u.port_info.port_state__link_speed_supported
+ & 0xf ) == 4 );
memcpy ( &ibdev->port_gid.u.bytes[0], u.port_info.gid_prefix, 8 );
ibdev->sm_lid = ntohs ( u.port_info.mastersm_lid );
@@ -391,10 +389,6 @@ int register_ibdev ( struct ib_device *ibdev ) {
if ( ( rc = ib_open ( ibdev ) ) != 0 )
goto err_open;
- /* Wait for link */
- if ( ( rc = ib_wait_for_link ( ibdev ) ) != 0 )
- goto err_wait_for_link;
-
/* Get MAD parameters */
if ( ( rc = ib_get_mad_params ( ibdev ) ) != 0 )
goto err_get_mad_params;
@@ -410,7 +404,6 @@ int register_ibdev ( struct ib_device *ibdev ) {
err_ipoib_probe:
err_get_mad_params:
- err_wait_for_link:
ib_close ( ibdev );
err_open:
return rc;
@@ -435,3 +428,21 @@ void free_ibdev ( struct ib_device *ibdev ) {
free ( ibdev );
}
+/**
+ * Handle Infiniband link state change
+ *
+ * @v ibdev Infiniband device
+ */
+void ib_link_state_changed ( struct ib_device *ibdev ) {
+ int rc;
+
+ /* Update MAD parameters */
+ if ( ( rc = ib_get_mad_params ( ibdev ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not update MAD parameters: %s\n",
+ ibdev, strerror ( rc ) );
+ return;
+ }
+
+ /* Notify IPoIB of link state change */
+ ipoib_link_state_changed ( ibdev );
+}