summaryrefslogtreecommitdiffstats
path: root/src/drivers
diff options
context:
space:
mode:
authorMichael Brown2008-04-18 03:50:48 +0200
committerMichael Brown2008-04-18 03:50:48 +0200
commita176a24ac0a5769d6a844149595f409a1bc2e41d (patch)
tree4dc18928e5274a853836fdf99ab93af0f24b4077 /src/drivers
parent[HCI] Display "Not an executable image" when appropriate (diff)
downloadipxe-a176a24ac0a5769d6a844149595f409a1bc2e41d.tar.gz
ipxe-a176a24ac0a5769d6a844149595f409a1bc2e41d.tar.xz
ipxe-a176a24ac0a5769d6a844149595f409a1bc2e41d.zip
[Infiniband] Add preliminary multiple port support for Hermon cards
Infiniband devices no longer block waiting for link-up in register_ibdev(). Hermon driver needs to create an event queue and poll for link-up events. Infiniband core needs to reread MAD parameters when link state changes. IPoIB needs to cope with Infiniband link parameters being only partially available at probe and open time.
Diffstat (limited to 'src/drivers')
-rw-r--r--src/drivers/infiniband/hermon.c282
-rw-r--r--src/drivers/infiniband/hermon.h65
-rw-r--r--src/drivers/net/ipoib.c257
3 files changed, 504 insertions, 100 deletions
diff --git a/src/drivers/infiniband/hermon.c b/src/drivers/infiniband/hermon.c
index c10559f9..41494a5a 100644
--- a/src/drivers/infiniband/hermon.c
+++ b/src/drivers/infiniband/hermon.c
@@ -30,6 +30,7 @@
#include <gpxe/umalloc.h>
#include <gpxe/iobuf.h>
#include <gpxe/netdevice.h>
+#include <gpxe/process.h>
#include <gpxe/infiniband.h>
#include "hermon.h"
@@ -317,19 +318,30 @@ hermon_cmd_write_mtt ( struct hermon *hermon,
}
static inline int
+hermon_cmd_map_eq ( struct hermon *hermon, unsigned long index_map,
+ const struct hermonprm_event_mask *mask ) {
+ return hermon_cmd ( hermon,
+ HERMON_HCR_IN_CMD ( HERMON_HCR_MAP_EQ,
+ 0, sizeof ( *mask ) ),
+ 0, mask, index_map, NULL );
+}
+
+static inline int
hermon_cmd_sw2hw_eq ( struct hermon *hermon, unsigned int index,
- const struct hermonprm_eqc *eqc ) {
+ const struct hermonprm_eqc *eqctx ) {
return hermon_cmd ( hermon,
HERMON_HCR_IN_CMD ( HERMON_HCR_SW2HW_EQ,
- 1, sizeof ( *eqc ) ),
- 0, eqc, index, NULL );
+ 1, sizeof ( *eqctx ) ),
+ 0, eqctx, index, NULL );
}
static inline int
-hermon_cmd_hw2sw_eq ( struct hermon *hermon, unsigned int index ) {
+hermon_cmd_hw2sw_eq ( struct hermon *hermon, unsigned int index,
+ struct hermonprm_eqc *eqctx ) {
return hermon_cmd ( hermon,
- HERMON_HCR_VOID_CMD ( HERMON_HCR_HW2SW_EQ ),
- 1, NULL, index, NULL );
+ HERMON_HCR_OUT_CMD ( HERMON_HCR_HW2SW_EQ,
+ 1, sizeof ( *eqctx ) ),
+ 1, NULL, index, eqctx );
}
static inline int
@@ -378,6 +390,15 @@ hermon_cmd_rtr2rts_qp ( struct hermon *hermon, unsigned long qpn,
}
static inline int
+hermon_cmd_rts2rts_qp ( struct hermon *hermon, unsigned long qpn,
+ const struct hermonprm_qp_ee_state_transitions *ctx ) {
+ return hermon_cmd ( hermon,
+ HERMON_HCR_IN_CMD ( HERMON_HCR_RTS2RTS_QP,
+ 1, sizeof ( *ctx ) ),
+ 0, ctx, qpn, NULL );
+}
+
+static inline int
hermon_cmd_2rst_qp ( struct hermon *hermon, unsigned long qpn ) {
return hermon_cmd ( hermon,
HERMON_HCR_VOID_CMD ( HERMON_HCR_2RST_QP ),
@@ -860,6 +881,39 @@ static int hermon_create_qp ( struct ib_device *ibdev,
}
/**
+ * Modify queue pair
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v mod_list Modification list
+ * @ret rc Return status code
+ */
+static int hermon_modify_qp ( struct ib_device *ibdev,
+ struct ib_queue_pair *qp,
+ unsigned long mod_list ) {
+ struct hermon *hermon = ib_get_drvdata ( ibdev );
+ struct hermonprm_qp_ee_state_transitions qpctx;
+ unsigned long optparammask = 0;
+ int rc;
+
+ /* Construct optparammask */
+ if ( mod_list & IB_MODIFY_QKEY )
+ optparammask |= HERMON_QP_OPT_PARAM_QKEY;
+
+ /* Issue RTS2RTS_QP */
+ memset ( &qpctx, 0, sizeof ( qpctx ) );
+ MLX_FILL_1 ( &qpctx, 0, opt_param_mask, optparammask );
+ MLX_FILL_1 ( &qpctx, 44, qpc_eec_data.q_key, qp->qkey );
+ if ( ( rc = hermon_cmd_rts2rts_qp ( hermon, qp->qpn, &qpctx ) ) != 0 ){
+ DBGC ( hermon, "Hermon %p RTS2RTS_QP failed: %s\n",
+ hermon, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
* Destroy queue pair
*
* @v ibdev Infiniband device
@@ -1356,6 +1410,7 @@ static struct ib_device_operations hermon_ib_operations = {
.create_cq = hermon_create_cq,
.destroy_cq = hermon_destroy_cq,
.create_qp = hermon_create_qp,
+ .modify_qp = hermon_modify_qp,
.destroy_qp = hermon_destroy_qp,
.post_send = hermon_post_send,
.post_recv = hermon_post_recv,
@@ -1369,6 +1424,211 @@ static struct ib_device_operations hermon_ib_operations = {
/***************************************************************************
*
+ * Event queues
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Create event queue
+ *
+ * @v hermon Hermon device
+ * @ret rc Return status code
+ */
+static int hermon_create_eq ( struct hermon *hermon ) {
+ struct hermon_event_queue *hermon_eq = &hermon->eq;
+ struct hermonprm_eqc eqctx;
+ struct hermonprm_event_mask mask;
+ unsigned int i;
+ int rc;
+
+ /* Allocate event queue itself */
+ hermon_eq->eqe_size =
+ ( HERMON_NUM_EQES * sizeof ( hermon_eq->eqe[0] ) );
+ hermon_eq->eqe = malloc_dma ( hermon_eq->eqe_size,
+ sizeof ( hermon_eq->eqe[0] ) );
+ if ( ! hermon_eq->eqe ) {
+ rc = -ENOMEM;
+ goto err_eqe;
+ }
+ memset ( hermon_eq->eqe, 0, hermon_eq->eqe_size );
+ for ( i = 0 ; i < HERMON_NUM_EQES ; i++ ) {
+ MLX_FILL_1 ( &hermon_eq->eqe[i].generic, 7, owner, 1 );
+ }
+ barrier();
+
+ /* Allocate MTT entries */
+ if ( ( rc = hermon_alloc_mtt ( hermon, hermon_eq->eqe,
+ hermon_eq->eqe_size,
+ &hermon_eq->mtt ) ) != 0 )
+ goto err_alloc_mtt;
+
+ /* Hand queue over to hardware */
+ memset ( &eqctx, 0, sizeof ( eqctx ) );
+ MLX_FILL_1 ( &eqctx, 0, st, 0xa /* "Fired" */ );
+ MLX_FILL_1 ( &eqctx, 2,
+ page_offset, ( hermon_eq->mtt.page_offset >> 5 ) );
+ MLX_FILL_1 ( &eqctx, 3, log_eq_size, fls ( HERMON_NUM_EQES - 1 ) );
+ MLX_FILL_1 ( &eqctx, 7, mtt_base_addr_l,
+ ( hermon_eq->mtt.mtt_base_addr >> 3 ) );
+ if ( ( rc = hermon_cmd_sw2hw_eq ( hermon, 0, &eqctx ) ) != 0 ) {
+ DBGC ( hermon, "Hermon %p SW2HW_EQ failed: %s\n",
+ hermon, strerror ( rc ) );
+ goto err_sw2hw_eq;
+ }
+
+ /* Map events to this event queue */
+ memset ( &mask, 0, sizeof ( mask ) );
+ MLX_FILL_1 ( &mask, 1, port_state_change, 1 );
+ if ( ( rc = hermon_cmd_map_eq ( hermon, ( HERMON_MAP_EQ_MAP | 0 ),
+ &mask ) ) != 0 ) {
+ DBGC ( hermon, "Hermon %p MAP_EQ failed: %s\n",
+ hermon, strerror ( rc ) );
+ goto err_map_eq;
+ }
+
+ return 0;
+
+ err_map_eq:
+ hermon_cmd_hw2sw_eq ( hermon, 0, &eqctx );
+ err_sw2hw_eq:
+ hermon_free_mtt ( hermon, &hermon_eq->mtt );
+ err_alloc_mtt:
+ free_dma ( hermon_eq->eqe, hermon_eq->eqe_size );
+ err_eqe:
+ memset ( hermon_eq, 0, sizeof ( *hermon_eq ) );
+ return rc;
+}
+
+/**
+ * Destroy event queue
+ *
+ * @v hermon Hermon device
+ */
+static void hermon_destroy_eq ( struct hermon *hermon ) {
+ struct hermon_event_queue *hermon_eq = &hermon->eq;
+ struct hermonprm_eqc eqctx;
+ struct hermonprm_event_mask mask;
+ int rc;
+
+ /* Unmap events from event queue */
+ memset ( &mask, 0, sizeof ( mask ) );
+ MLX_FILL_1 ( &mask, 1, port_state_change, 1 );
+ if ( ( rc = hermon_cmd_map_eq ( hermon, ( HERMON_MAP_EQ_UNMAP | 0 ),
+ &mask ) ) != 0 ) {
+ DBGC ( hermon, "Hermon %p FATAL MAP_EQ failed to unmap: %s\n",
+ hermon, strerror ( rc ) );
+ /* Continue; HCA may die but system should survive */
+ }
+
+ /* Take ownership back from hardware */
+ if ( ( rc = hermon_cmd_hw2sw_eq ( hermon, 0, &eqctx ) ) != 0 ) {
+ DBGC ( hermon, "Hermon %p FATAL HW2SW_EQ failed: %s\n",
+ hermon, strerror ( rc ) );
+ /* Leak memory and return; at least we avoid corruption */
+ return;
+ }
+
+ /* Free MTT entries */
+ hermon_free_mtt ( hermon, &hermon_eq->mtt );
+
+ /* Free memory */
+ free_dma ( hermon_eq->eqe, hermon_eq->eqe_size );
+ memset ( hermon_eq, 0, sizeof ( *hermon_eq ) );
+}
+
+/**
+ * Handle port state event
+ *
+ * @v hermon Hermon device
+ * @v eqe Port state change event queue entry
+ */
+static void hermon_event_port_state_change ( struct hermon *hermon,
+ union hermonprm_event_entry *eqe){
+ unsigned int port;
+ int link_up;
+
+ /* Get port and link status */
+ port = ( MLX_GET ( &eqe->port_state_change, data.p ) - 1 );
+ link_up = ( MLX_GET ( &eqe->generic, event_sub_type ) & 0x04 );
+ DBGC ( hermon, "Hermon %p port %d link %s\n", hermon, ( port + 1 ),
+ ( link_up ? "up" : "down" ) );
+
+ /* Sanity check */
+ if ( port >= HERMON_NUM_PORTS ) {
+ DBGC ( hermon, "Hermon %p port %d does not exist!\n",
+ hermon, ( port + 1 ) );
+ return;
+ }
+
+ /* Notify Infiniband core of link state change */
+ ib_link_state_changed ( hermon->ibdev[port] );
+}
+
+/**
+ * Poll event queue
+ *
+ * @v hermon Hermon device
+ */
+static void hermon_poll_eq ( struct hermon *hermon ) {
+ struct hermon_event_queue *hermon_eq = &hermon->eq;
+ union hermonprm_event_entry *eqe;
+ union hermonprm_doorbell_register db_reg;
+ unsigned int eqe_idx_mask;
+ unsigned int event_type;
+
+ while ( 1 ) {
+ eqe_idx_mask = ( HERMON_NUM_EQES - 1 );
+ eqe = &hermon_eq->eqe[hermon_eq->next_idx & eqe_idx_mask];
+ if ( MLX_GET ( &eqe->generic, owner ) ^
+ ( ( hermon_eq->next_idx & HERMON_NUM_EQES ) ? 1 : 0 ) ) {
+ /* Entry still owned by hardware; end of poll */
+ break;
+ }
+ DBGCP ( hermon, "Hermon %p event:\n", hermon );
+ DBGCP_HD ( hermon, eqe, sizeof ( *eqe ) );
+
+ /* Handle event */
+ event_type = MLX_GET ( &eqe->generic, event_type );
+ switch ( event_type ) {
+ case HERMON_EV_PORT_STATE_CHANGE:
+ hermon_event_port_state_change ( hermon, eqe );
+ break;
+ default:
+ DBGC ( hermon, "Hermon %p unrecognised event type "
+ "%#x:\n", hermon, event_type );
+ DBGC_HD ( hermon, eqe, sizeof ( *eqe ) );
+ break;
+ }
+
+ /* Update event queue's index */
+ hermon_eq->next_idx++;
+
+ /* Ring doorbell */
+ memset ( &db_reg, 0, sizeof ( db_reg ) );
+ MLX_FILL_1 ( &db_reg.event, 0, ci, hermon_eq->next_idx );
+ DBGCP ( hermon, "Ringing doorbell %08lx with %08lx\n",
+ virt_to_phys ( hermon->uar + HERMON_DB_EQ0_OFFSET ),
+ db_reg.dword[0] );
+ writel ( db_reg.dword[0],
+ ( hermon->uar + HERMON_DB_EQ0_OFFSET ) );
+ }
+}
+
+/**
+ * Event queue poll processor
+ *
+ * @v process Hermon event queue process
+ */
+static void hermon_step ( struct process *process ) {
+ struct hermon *hermon =
+ container_of ( process, struct hermon, event_process );
+
+ hermon_poll_eq ( hermon );
+}
+
+/***************************************************************************
+ *
* Firmware control
*
***************************************************************************
@@ -1879,6 +2139,7 @@ static int hermon_probe ( struct pci_device *pci,
goto err_alloc_hermon;
}
pci_set_drvdata ( pci, hermon );
+ process_init ( &hermon->event_process, hermon_step, NULL );
/* Allocate Infiniband devices */
for ( i = 0 ; i < HERMON_NUM_PORTS ; i++ ) {
@@ -1945,6 +2206,10 @@ static int hermon_probe ( struct pci_device *pci,
if ( ( rc = hermon_setup_mpt ( hermon ) ) != 0 )
goto err_setup_mpt;
+ /* Set up event queue */
+ if ( ( rc = hermon_create_eq ( hermon ) ) != 0 )
+ goto err_create_eq;
+
/* Register Infiniband devices */
for ( i = 0 ; i < HERMON_NUM_PORTS ; i++ ) {
if ( ( rc = register_ibdev ( hermon->ibdev[i] ) ) != 0 ) {
@@ -1960,6 +2225,8 @@ static int hermon_probe ( struct pci_device *pci,
err_register_ibdev:
for ( ; i >= 0 ; i-- )
unregister_ibdev ( hermon->ibdev[i] );
+ hermon_destroy_eq ( hermon );
+ err_create_eq:
err_setup_mpt:
hermon_cmd_close_hca ( hermon );
err_init_hca:
@@ -1976,6 +2243,7 @@ static int hermon_probe ( struct pci_device *pci,
err_alloc_ibdev:
for ( ; i >= 0 ; i-- )
free_ibdev ( hermon->ibdev[i] );
+ process_del ( &hermon->event_process );
free ( hermon );
err_alloc_hermon:
return rc;
@@ -1992,6 +2260,7 @@ static void hermon_remove ( struct pci_device *pci ) {
for ( i = ( HERMON_NUM_PORTS - 1 ) ; i >= 0 ; i-- )
unregister_ibdev ( hermon->ibdev[i] );
+ hermon_destroy_eq ( hermon );
hermon_cmd_close_hca ( hermon );
hermon_free_icm ( hermon );
hermon_stop_firmware ( hermon );
@@ -2000,6 +2269,7 @@ static void hermon_remove ( struct pci_device *pci ) {
free_dma ( hermon->mailbox_in, HERMON_MBOX_SIZE );
for ( i = ( HERMON_NUM_PORTS - 1 ) ; i >= 0 ; i-- )
free_ibdev ( hermon->ibdev[i] );
+ process_del ( &hermon->event_process );
free ( hermon );
}
diff --git a/src/drivers/infiniband/hermon.h b/src/drivers/infiniband/hermon.h
index 959e6a9d..d9e3dd11 100644
--- a/src/drivers/infiniband/hermon.h
+++ b/src/drivers/infiniband/hermon.h
@@ -9,6 +9,7 @@
#include <stdint.h>
#include <gpxe/uaccess.h>
+#include <gpxe/process.h>
#include "mlx_bitops.h"
#include "MT25408_PRM.h"
@@ -18,7 +19,7 @@
*/
/* Ports in existence */
-#define HERMON_NUM_PORTS 1
+#define HERMON_NUM_PORTS 2
#define HERMON_PORT_BASE 1
/* PCI BARs */
@@ -48,6 +49,7 @@
#define HERMON_HCR_RST2INIT_QP 0x0019
#define HERMON_HCR_INIT2RTR_QP 0x001a
#define HERMON_HCR_RTR2RTS_QP 0x001b
+#define HERMON_HCR_RTS2RTS_QP 0x001c
#define HERMON_HCR_2RST_QP 0x0021
#define HERMON_HCR_MAD_IFC 0x0024
#define HERMON_HCR_READ_MCG 0x0025
@@ -75,6 +77,14 @@
#define HERMON_PAGE_SIZE 4096
#define HERMON_DB_POST_SND_OFFSET 0x14
+#define HERMON_DB_EQ0_OFFSET 0x800
+
+#define HERMON_QP_OPT_PARAM_QKEY 0x00000020UL
+
+#define HERMON_MAP_EQ_MAP ( 0UL << 31 )
+#define HERMON_MAP_EQ_UNMAP ( 1UL << 31 )
+
+#define HERMON_EV_PORT_STATE_CHANGE 0x09
/*
* Datatypes that seem to be missing from the autogenerated documentation
@@ -108,12 +118,32 @@ struct hermonprm_send_db_register_st {
pseudo_bit_t qn[0x00018];
} __attribute__ (( packed ));
+struct hermonprm_event_db_register_st {
+ pseudo_bit_t ci[0x00018];
+ pseudo_bit_t reserver[0x00007];
+ pseudo_bit_t a[0x00001];
+} __attribute__ (( packed ));
+
struct hermonprm_scalar_parameter_st {
pseudo_bit_t value_hi[0x00020];
/* -------------- */
pseudo_bit_t value[0x00020];
} __attribute__ (( packed ));
+struct hermonprm_event_mask_st {
+ pseudo_bit_t reserved0[0x00020];
+/* -------------- */
+ pseudo_bit_t completion[0x00001];
+ pseudo_bit_t reserved1[0x0008];
+ pseudo_bit_t port_state_change[0x00001];
+ pseudo_bit_t reserved2[0x00016];
+} __attribute__ (( packed ));
+
+struct hermonprm_port_state_change_event_st {
+ pseudo_bit_t reserved[0x00020];
+ struct hermonprm_port_state_change_st data;
+} __attribute__ (( packed ));
+
/*
* Wrapper structures for hardware datatypes
*
@@ -124,6 +154,9 @@ struct MLX_DECLARE_STRUCT ( hermonprm_completion_queue_entry );
struct MLX_DECLARE_STRUCT ( hermonprm_completion_with_error );
struct MLX_DECLARE_STRUCT ( hermonprm_cq_db_record );
struct MLX_DECLARE_STRUCT ( hermonprm_eqc );
+struct MLX_DECLARE_STRUCT ( hermonprm_event_db_register );
+struct MLX_DECLARE_STRUCT ( hermonprm_event_mask );
+struct MLX_DECLARE_STRUCT ( hermonprm_event_queue_entry );
struct MLX_DECLARE_STRUCT ( hermonprm_hca_command_register );
struct MLX_DECLARE_STRUCT ( hermonprm_init_hca );
struct MLX_DECLARE_STRUCT ( hermonprm_init_port );
@@ -132,6 +165,7 @@ struct MLX_DECLARE_STRUCT ( hermonprm_mcg_entry );
struct MLX_DECLARE_STRUCT ( hermonprm_mgm_hash );
struct MLX_DECLARE_STRUCT ( hermonprm_mpt );
struct MLX_DECLARE_STRUCT ( hermonprm_mtt );
+struct MLX_DECLARE_STRUCT ( hermonprm_port_state_change_event );
struct MLX_DECLARE_STRUCT ( hermonprm_qp_db_record );
struct MLX_DECLARE_STRUCT ( hermonprm_qp_ee_state_transitions );
struct MLX_DECLARE_STRUCT ( hermonprm_query_dev_cap );
@@ -175,8 +209,14 @@ union hermonprm_completion_entry {
struct hermonprm_completion_with_error error;
} __attribute__ (( packed ));
+union hermonprm_event_entry {
+ struct hermonprm_event_queue_entry generic;
+ struct hermonprm_port_state_change_event port_state_change;
+} __attribute__ (( packed ));
+
union hermonprm_doorbell_register {
struct hermonprm_send_db_register send;
+ struct hermonprm_event_db_register event;
uint32_t dword[1];
} __attribute__ (( packed ));
@@ -362,6 +402,24 @@ struct hermon_completion_queue {
*/
#define HERMON_MAX_EQS 4
+/** A Hermon event queue */
+struct hermon_event_queue {
+ /** Event queue entries */
+ union hermonprm_event_entry *eqe;
+ /** Size of event queue */
+ size_t eqe_size;
+ /** MTT descriptor */
+ struct hermon_mtt mtt;
+ /** Next event queue entry index */
+ unsigned long next_idx;
+};
+
+/** Number of event queue entries
+ *
+ * This is a policy decision.
+ */
+#define HERMON_NUM_EQES 4
+
/** A Hermon resource bitmask */
typedef uint32_t hermon_bitmask_t;
@@ -397,6 +455,11 @@ struct hermon {
*/
unsigned long reserved_lkey;
+ /** Event queue */
+ struct hermon_event_queue eq;
+ /** Event queue process */
+ struct process event_process;
+
/** Completion queue in-use bitmask */
hermon_bitmask_t cq_inuse[ HERMON_BITMASK_SIZE ( HERMON_MAX_CQS ) ];
/** Queue pair in-use bitmask */
diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c
index d457b258..3b915bf0 100644
--- a/src/drivers/net/ipoib.c
+++ b/src/drivers/net/ipoib.c
@@ -80,10 +80,14 @@ struct ipoib_device {
struct ib_gid broadcast_gid;
/** Broadcast LID */
unsigned int broadcast_lid;
- /** Joined to broadcast group */
- int broadcast_joined;
/** Data queue key */
unsigned long data_qkey;
+ /** Attached to multicast group
+ *
+ * This flag indicates whether or not we have attached our
+ * data queue pair to the broadcast multicast GID.
+ */
+ int broadcast_attached;
};
/**
@@ -272,6 +276,10 @@ static int ipoib_create_qset ( struct ipoib_device *ipoib,
struct ib_device *ibdev = ipoib->ibdev;
int rc;
+ /* Sanity check */
+ assert ( qset->cq == NULL );
+ assert ( qset->qp == NULL );
+
/* Store queue parameters */
qset->recv_max_fill = num_recv_wqes;
@@ -617,14 +625,24 @@ static void ipoib_recv_path_record ( struct ipoib_device *ipoib __unused,
*/
static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib,
struct ib_mad_mc_member_record *mc_member_record ) {
+ int joined;
+ int rc;
+
/* Record parameters */
- ipoib->broadcast_joined =
- ( mc_member_record->scope__join_state & 0x0f );
+ joined = ( mc_member_record->scope__join_state & 0x0f );
ipoib->data_qkey = ntohl ( mc_member_record->qkey );
ipoib->broadcast_lid = ntohs ( mc_member_record->mlid );
DBGC ( ipoib, "IPoIB %p %s broadcast group: qkey %lx mlid %x\n",
- ipoib, ( ipoib->broadcast_joined ? "joined" : "left" ),
- ipoib->data_qkey, ipoib->broadcast_lid );
+ ipoib, ( joined ? "joined" : "left" ), ipoib->data_qkey,
+ ipoib->broadcast_lid );
+
+ /* Update data queue pair qkey */
+ if ( ( rc = ib_modify_qp ( ipoib->ibdev, ipoib->data.qp,
+ IB_MODIFY_QKEY, ipoib->data_qkey ) ) != 0 ){
+ DBGC ( ipoib, "IPoIB %p could not update data qkey: %s\n",
+ ipoib, strerror ( rc ) );
+ return;
+ }
}
/**
@@ -742,6 +760,56 @@ static void ipoib_irq ( struct net_device *netdev __unused,
}
/**
+ * Join IPv4 broadcast multicast group
+ *
+ * @v ipoib IPoIB device
+ * @ret rc Return status code
+ */
+static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
+ int rc;
+
+ /* Sanity check */
+ if ( ! ipoib->data.qp )
+ return 0;
+
+ /* Attach data queue to broadcast multicast GID */
+ assert ( ipoib->broadcast_attached == 0 );
+ if ( ( rc = ib_mcast_attach ( ipoib->ibdev, ipoib->data.qp,
+ &ipoib->broadcast_gid ) ) != 0 ){
+ DBGC ( ipoib, "IPoIB %p could not attach to broadcast GID: "
+ "%s\n", ipoib, strerror ( rc ) );
+ return rc;
+ }
+ ipoib->broadcast_attached = 1;
+
+ /* Initiate broadcast group join */
+ if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid,
+ 1 ) ) != 0 ) {
+ DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n",
+ ipoib, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Leave IPv4 broadcast multicast group
+ *
+ * @v ipoib IPoIB device
+ */
+static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
+
+ /* Detach data queue from broadcast multicast GID */
+ if ( ipoib->broadcast_attached ) {
+ assert ( ipoib->data.qp != NULL );
+ ib_mcast_detach ( ipoib->ibdev, ipoib->data.qp,
+ &ipoib->broadcast_gid );
+ ipoib->broadcast_attached = 0;
+ }
+}
+
+/**
* Open IPoIB network device
*
* @v netdev Network device
@@ -749,22 +817,53 @@ static void ipoib_irq ( struct net_device *netdev __unused,
*/
static int ipoib_open ( struct net_device *netdev ) {
struct ipoib_device *ipoib = netdev->priv;
- struct ib_device *ibdev = ipoib->ibdev;
+ struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
int rc;
- /* Attach to broadcast multicast GID */
- if ( ( rc = ib_mcast_attach ( ibdev, ipoib->data.qp,
- &ipoib->broadcast_gid ) ) != 0 ) {
- DBG ( "Could not attach to broadcast GID: %s\n",
- strerror ( rc ) );
- return rc;
+ /* Allocate metadata queue set */
+ if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta,
+ IPOIB_META_NUM_CQES,
+ IPOIB_META_NUM_SEND_WQES,
+ IPOIB_META_NUM_RECV_WQES,
+ IB_GLOBAL_QKEY ) ) != 0 ) {
+ DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n",
+ ipoib, strerror ( rc ) );
+ goto err_create_meta_qset;
}
+ /* Allocate data queue set */
+ if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data,
+ IPOIB_DATA_NUM_CQES,
+ IPOIB_DATA_NUM_SEND_WQES,
+ IPOIB_DATA_NUM_RECV_WQES,
+ IB_GLOBAL_QKEY ) ) != 0 ) {
+ DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n",
+ ipoib, strerror ( rc ) );
+ goto err_create_data_qset;
+ }
+
+ /* Update MAC address with data QPN */
+ mac->qpn = htonl ( ipoib->data.qp->qpn );
+
/* Fill receive rings */
ipoib_refill_recv ( ipoib, &ipoib->meta );
ipoib_refill_recv ( ipoib, &ipoib->data );
+ /* Join broadcast group */
+ if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
+ DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
+ ipoib, strerror ( rc ) );
+ goto err_join_broadcast;
+ }
+
return 0;
+
+ err_join_broadcast:
+ ipoib_destroy_qset ( ipoib, &ipoib->data );
+ err_create_data_qset:
+ ipoib_destroy_qset ( ipoib, &ipoib->meta );
+ err_create_meta_qset:
+ return rc;
}
/**
@@ -774,12 +873,17 @@ static int ipoib_open ( struct net_device *netdev ) {
*/
static void ipoib_close ( struct net_device *netdev ) {
struct ipoib_device *ipoib = netdev->priv;
- struct ib_device *ibdev = ipoib->ibdev;
+ struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
- /* Detach from broadcast multicast GID */
- ib_mcast_detach ( ibdev, ipoib->data.qp, &ipoib->broadcast_gid );
+ /* Leave broadcast group */
+ ipoib_leave_broadcast_group ( ipoib );
- /* FIXME: should probably flush the receive ring */
+ /* Remove data QPN from MAC address */
+ mac->qpn = 0;
+
+ /* Tear down the queues */
+ ipoib_destroy_qset ( ipoib, &ipoib->data );
+ ipoib_destroy_qset ( ipoib, &ipoib->meta );
}
/** IPoIB network device operations */
@@ -792,44 +896,53 @@ static struct net_device_operations ipoib_operations = {
};
/**
- * Join IPoIB broadcast group
+ * Update IPoIB dynamic Infiniband parameters
*
* @v ipoib IPoIB device
- * @ret rc Return status code
+ *
+ * The Infiniband port GID and partition key will change at runtime,
+ * when the link is established (or lost). The MAC address is based
+ * on the port GID, and the broadcast GID is based on the partition
+ * key. This function recalculates these IPoIB device parameters.
*/
-static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
+static void ipoib_set_ib_params ( struct ipoib_device *ipoib ) {
struct ib_device *ibdev = ipoib->ibdev;
- unsigned int delay_ms;
- int rc;
+ struct ipoib_mac *mac;
- /* Make sure we have some receive descriptors */
- ipoib_refill_recv ( ipoib, &ipoib->meta );
+ /* Calculate GID portion of MAC address based on port GID */
+ mac = ( ( struct ipoib_mac * ) ipoib->netdev->ll_addr );
+ memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) );
- /* Send join request */
- if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid,
- 1 ) ) != 0 ) {
- DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n",
- ipoib, strerror ( rc ) );
- return rc;
- }
+ /* Calculate broadcast GID based on partition key */
+ memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid,
+ sizeof ( ipoib->broadcast_gid ) );
+ ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey );
+}
+
+/**
+ * Handle link status change
+ *
+ * @v ibdev Infiniband device
+ */
+void ipoib_link_state_changed ( struct ib_device *ibdev ) {
+ struct net_device *netdev = ib_get_ownerdata ( ibdev );
+ struct ipoib_device *ipoib = netdev->priv;
+ int rc;
- /* Wait for join to complete. Ideally we wouldn't delay for
- * this long, but we need the queue key before we can set up
- * the data queue pair, which we need before we can know the
- * MAC address.
+ /* Leave existing broadcast group */
+ ipoib_leave_broadcast_group ( ipoib );
+
+ /* Update MAC address and broadcast GID based on new port GID
+ * and partition key.
*/
- for ( delay_ms = IPOIB_JOIN_MAX_DELAY_MS ; delay_ms ; delay_ms-- ) {
- mdelay ( 1 );
- ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send,
- ipoib_meta_complete_recv );
- ipoib_refill_recv ( ipoib, &ipoib->meta );
- if ( ipoib->broadcast_joined )
- return 0;
- }
- DBGC ( ipoib, "IPoIB %p timed out waiting for broadcast join\n",
- ipoib );
+ ipoib_set_ib_params ( ipoib );
- return -ETIMEDOUT;
+ /* Join new broadcast group */
+ if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
+ DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
+ "%s\n", ipoib, strerror ( rc ) );
+ return;
+ }
}
/**
@@ -841,7 +954,6 @@ static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
int ipoib_probe ( struct ib_device *ibdev ) {
struct net_device *netdev;
struct ipoib_device *ipoib;
- struct ipoib_mac *mac;
int rc;
/* Allocate network device */
@@ -856,44 +968,11 @@ int ipoib_probe ( struct ib_device *ibdev ) {
ipoib->netdev = netdev;
ipoib->ibdev = ibdev;
- /* Calculate broadcast GID */
- memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid,
- sizeof ( ipoib->broadcast_gid ) );
- ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey );
-
- /* Allocate metadata queue set */
- if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta,
- IPOIB_META_NUM_CQES,
- IPOIB_META_NUM_SEND_WQES,
- IPOIB_META_NUM_RECV_WQES,
- IB_GLOBAL_QKEY ) ) != 0 ) {
- DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n",
- ipoib, strerror ( rc ) );
- goto err_create_meta_qset;
- }
-
- /* Join broadcast group */
- if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
- DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
- ipoib, strerror ( rc ) );
- goto err_join_broadcast_group;
- }
-
- /* Allocate data queue set */
- if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data,
- IPOIB_DATA_NUM_CQES,
- IPOIB_DATA_NUM_SEND_WQES,
- IPOIB_DATA_NUM_RECV_WQES,
- ipoib->data_qkey ) ) != 0 ) {
- DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n",
- ipoib, strerror ( rc ) );
- goto err_create_data_qset;
- }
-
- /* Construct MAC address */
- mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
- mac->qpn = htonl ( ipoib->data.qp->qpn );
- memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) );
+ /* Calculate as much of the broadcast GID and the MAC address
+ * as we can. We won't know either of these in full until we
+ * have link-up.
+ */
+ ipoib_set_ib_params ( ipoib );
/* Register network device */
if ( ( rc = register_netdev ( netdev ) ) != 0 )
@@ -902,11 +981,6 @@ int ipoib_probe ( struct ib_device *ibdev ) {
return 0;
err_register_netdev:
- ipoib_destroy_qset ( ipoib, &ipoib->data );
- err_join_broadcast_group:
- err_create_data_qset:
- ipoib_destroy_qset ( ipoib, &ipoib->meta );
- err_create_meta_qset:
netdev_nullify ( netdev );
netdev_put ( netdev );
return rc;
@@ -919,11 +993,8 @@ int ipoib_probe ( struct ib_device *ibdev ) {
*/
void ipoib_remove ( struct ib_device *ibdev ) {
struct net_device *netdev = ib_get_ownerdata ( ibdev );
- struct ipoib_device *ipoib = netdev->priv;
unregister_netdev ( netdev );
- ipoib_destroy_qset ( ipoib, &ipoib->data );
- ipoib_destroy_qset ( ipoib, &ipoib->meta );
netdev_nullify ( netdev );
netdev_put ( netdev );
}