summaryrefslogblamecommitdiffstats
path: root/src/net/infiniband.c
blob: 2edadc99d564c67c2cab3b3d6de1cb733ffcaec4 (plain) (tree)


















                                                                      
                   

                   
                   


                     
                      


                           
                       







                            
   





















                                                                      

                                                                         



                            

                                                                               


















                                                                    






                                                            
                                 





                                                                          

                                                                          
                                 
                          




                                                                



                                                                           

                            
                        
















                                                                               

                                            



                            





                                                                              
                                                              
                                                    













                                                              

                                    


                    
   
                                                
  
                                        
                                         

                                                                  
   


                                                                      
 


                                                                           



                    










































































































































































                                                                              
















                                                                               
 
   















































                                                                          






                                             
 
/*
 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <byteswap.h>
#include <errno.h>
#include <assert.h>
#include <gpxe/list.h>
#include <gpxe/if_arp.h>
#include <gpxe/netdevice.h>
#include <gpxe/iobuf.h>
#include <gpxe/ipoib.h>
#include <gpxe/infiniband.h>

/** @file
 *
 * Infiniband protocol
 *
 */

/**
 * Create completion queue
 *
 * @v ibdev		Infiniband device
 * @v num_cqes		Number of completion queue entries
 * @ret cq		New completion queue
 */
struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev,
					    unsigned int num_cqes ) {
	struct ib_completion_queue *cq;
	int rc;

	DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );

	/* Allocate and initialise data structure */
	cq = zalloc ( sizeof ( *cq ) );
	if ( ! cq )
		return NULL;
	cq->num_cqes = num_cqes;
	INIT_LIST_HEAD ( &cq->work_queues );

	/* Perform device-specific initialisation and get CQN */
	if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
		DBGC ( ibdev, "IBDEV %p could not initialise completion "
		       "queue: %s\n", ibdev, strerror ( rc ) );
		free ( cq );
		return NULL;
	}

	DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
	       "with CQN %#lx\n", ibdev, num_cqes, cq, cq->dev_priv, cq->cqn );
	return cq;
}

/**
 * Destroy completion queue
 *
 * @v ibdev		Infiniband device
 * @v cq		Completion queue
 */
void ib_destroy_cq ( struct ib_device *ibdev,
		     struct ib_completion_queue *cq ) {
	DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
	       ibdev, cq->cqn );
	assert ( list_empty ( &cq->work_queues ) );
	ibdev->op->destroy_cq ( ibdev, cq );
	free ( cq );
}

/**
 * Create queue pair
 *
 * @v ibdev		Infiniband device
 * @v num_send_wqes	Number of send work queue entries
 * @v send_cq		Send completion queue
 * @v num_recv_wqes	Number of receive work queue entries
 * @v recv_cq		Receive completion queue
 * @v qkey		Queue key
 * @ret qp		Queue pair
 */
struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
				      unsigned int num_send_wqes,
				      struct ib_completion_queue *send_cq,
				      unsigned int num_recv_wqes,
				      struct ib_completion_queue *recv_cq,
				      unsigned long qkey ) {
	struct ib_queue_pair *qp;
	size_t total_size;
	int rc;

	DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );

	/* Allocate and initialise data structure */
	total_size = ( sizeof ( *qp ) +
		       ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
		       ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
	qp = zalloc ( total_size );
	if ( ! qp )
		return NULL;
	qp->qkey = qkey;
	qp->send.qp = qp;
	qp->send.is_send = 1;
	qp->send.cq = send_cq;
	list_add ( &qp->send.list, &send_cq->work_queues );
	qp->send.num_wqes = num_send_wqes;
	qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
	qp->recv.qp = qp;
	qp->recv.cq = recv_cq;
	list_add ( &qp->recv.list, &recv_cq->work_queues );
	qp->recv.num_wqes = num_recv_wqes;
	qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
			    ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));

	/* Perform device-specific initialisation and get QPN */
	if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
		DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
		       "%s\n", ibdev, strerror ( rc ) );
		list_del ( &qp->send.list );
		list_del ( &qp->recv.list );
		free ( qp );
		return NULL;
	}

	DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
	       ibdev, qp, qp->dev_priv, qp->qpn );
	DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
	       ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
	       qp->recv.iobufs );
	DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
	       ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
	       ( ( ( void * ) qp ) + total_size ) );
	return qp;
}

/**
 * Destroy queue pair
 *
 * @v ibdev		Infiniband device
 * @v qp		Queue pair
 */
void ib_destroy_qp ( struct ib_device *ibdev,
		     struct ib_queue_pair *qp ) {
	DBGC ( ibdev, "IBDEV %p destroying queue pair %#lx\n",
	       ibdev, qp->qpn );
	ibdev->op->destroy_qp ( ibdev, qp );
	list_del ( &qp->send.list );
	list_del ( &qp->recv.list );
	free ( qp );
}

/**
 * Find work queue belonging to completion queue
 *
 * @v cq		Completion queue
 * @v qpn		Queue pair number
 * @v is_send		Find send work queue (rather than receive)
 * @ret wq		Work queue, or NULL if not found
 */
struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
				    unsigned long qpn, int is_send ) {
	struct ib_work_queue *wq;

	list_for_each_entry ( wq, &cq->work_queues, list ) {
		if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
			return wq;
	}
	return NULL;
}

/***************************************************************************
 *
 * Management datagram operations
 *
 ***************************************************************************
 */

/**
 * Get port information
 *
 * @v ibdev		Infiniband device
 * @v port_info		Port information datagram to fill in
 * @ret rc		Return status code
 */
static int ib_get_port_info ( struct ib_device *ibdev,
			      struct ib_mad_port_info *port_info ) {
	struct ib_mad_hdr *hdr = &port_info->mad_hdr;
	int rc;

	/* Construct MAD */
	memset ( port_info, 0, sizeof ( *port_info ) );
	hdr->base_version = IB_MGMT_BASE_VERSION;
	hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
	hdr->class_version = 1;
	hdr->method = IB_MGMT_METHOD_GET;
	hdr->attr_id = htons ( IB_SMP_ATTR_PORT_INFO );
	hdr->attr_mod = htonl ( ibdev->port );

	if ( ( rc = ib_mad ( ibdev, hdr, sizeof ( *port_info ) ) ) != 0 ) {
		DBGC ( ibdev, "IBDEV %p could not get port info: %s\n",
		       ibdev, strerror ( rc ) );
		return rc;
	}
	return 0;
}

/**
 * Get GUID information
 *
 * @v ibdev		Infiniband device
 * @v guid_info		GUID information datagram to fill in
 * @ret rc		Return status code
 */
static int ib_get_guid_info ( struct ib_device *ibdev,
			      struct ib_mad_guid_info *guid_info ) {
	struct ib_mad_hdr *hdr = &guid_info->mad_hdr;
	int rc;

	/* Construct MAD */
	memset ( guid_info, 0, sizeof ( *guid_info ) );
	hdr->base_version = IB_MGMT_BASE_VERSION;
	hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
	hdr->class_version = 1;
	hdr->method = IB_MGMT_METHOD_GET;
	hdr->attr_id = htons ( IB_SMP_ATTR_GUID_INFO );

	if ( ( rc = ib_mad ( ibdev, hdr, sizeof ( *guid_info ) ) ) != 0 ) {
		DBGC ( ibdev, "IBDEV %p could not get GUID info: %s\n",
		       ibdev, strerror ( rc ) );
		return rc;
	}
	return 0;
}

/**
 * Get partition key table
 *
 * @v ibdev		Infiniband device
 * @v guid_info		Partition key table datagram to fill in
 * @ret rc		Return status code
 */
static int ib_get_pkey_table ( struct ib_device *ibdev,
			       struct ib_mad_pkey_table *pkey_table ) {
	struct ib_mad_hdr *hdr = &pkey_table->mad_hdr;
	int rc;

	/* Construct MAD */
	memset ( pkey_table, 0, sizeof ( *pkey_table ) );
	hdr->base_version = IB_MGMT_BASE_VERSION;
	hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
	hdr->class_version = 1;
	hdr->method = IB_MGMT_METHOD_GET;
	hdr->attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE );

	if ( ( rc = ib_mad ( ibdev, hdr, sizeof ( *pkey_table ) ) ) != 0 ) {
		DBGC ( ibdev, "IBDEV %p could not get pkey table: %s\n",
		       ibdev, strerror ( rc ) );
		return rc;
	}
	return 0;
}

/**
 * Wait for link up
 *
 * @v ibdev		Infiniband device
 * @ret rc		Return status code
 *
 * This function shouldn't really exist.  Unfortunately, IB links take
 * a long time to come up, and we can't get various key parameters
 * e.g. our own IPoIB MAC address without information from the subnet
 * manager).  We should eventually make link-up an asynchronous event.
 */
static int ib_wait_for_link ( struct ib_device *ibdev ) {
	struct ib_mad_port_info port_info;
	unsigned int retries;
	int rc;

	printf ( "Waiting for Infiniband link-up..." );
	for ( retries = 20 ; retries ; retries-- ) {
		if ( ( rc = ib_get_port_info ( ibdev, &port_info ) ) != 0 )
			continue;
		if ( ( ( port_info.port_state__link_speed_supported ) & 0xf )
		     == 4 ) {
			printf ( "ok\n" );
			return 0;
		}
		printf ( "." );
		sleep ( 1 );
	}
	printf ( "failed\n" );
	return -ENODEV;
};

/**
 * Get MAD parameters
 *
 * @v ibdev		Infiniband device
 * @ret rc		Return status code
 */
static int ib_get_mad_params ( struct ib_device *ibdev ) {
	union {
		/* This union exists just to save stack space */
		struct ib_mad_port_info port_info;
		struct ib_mad_guid_info guid_info;
		struct ib_mad_pkey_table pkey_table;
	} u;
	int rc;

	/* Port info gives us the first half of the port GID and the SM LID */
	if ( ( rc = ib_get_port_info ( ibdev, &u.port_info ) ) != 0 )
		return rc;
	memcpy ( &ibdev->port_gid.u.bytes[0], u.port_info.gid_prefix, 8 );
	ibdev->sm_lid = ntohs ( u.port_info.mastersm_lid );

	/* GUID info gives us the second half of the port GID */
	if ( ( rc = ib_get_guid_info ( ibdev, &u.guid_info ) ) != 0 )
		return rc;
	memcpy ( &ibdev->port_gid.u.bytes[8], u.guid_info.gid_local, 8 );

	/* Get partition key */
	if ( ( rc = ib_get_pkey_table ( ibdev, &u.pkey_table ) ) != 0 )
		return rc;
	ibdev->pkey = ntohs ( u.pkey_table.pkey[0][0] );

	DBGC ( ibdev, "IBDEV %p port GID is %08lx:%08lx:%08lx:%08lx\n",
	       ibdev, htonl ( ibdev->port_gid.u.dwords[0] ),
	       htonl ( ibdev->port_gid.u.dwords[1] ),
	       htonl ( ibdev->port_gid.u.dwords[2] ),
	       htonl ( ibdev->port_gid.u.dwords[3] ) );

	return 0;
}

/***************************************************************************
 *
 * Infiniband device creation/destruction
 *
 ***************************************************************************
 */

/**
 * Allocate Infiniband device
 *
 * @v priv_size		Size of private data area
 * @ret ibdev		Infiniband device, or NULL
 */
struct ib_device * alloc_ibdev ( size_t priv_size ) {
	struct ib_device *ibdev;
	size_t total_len;

	total_len = ( sizeof ( *ibdev ) + priv_size );
	ibdev = zalloc ( total_len );
	if ( ibdev ) {
		ibdev->dev_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
	}
	return ibdev;
}

/**
 * Register Infiniband device
 *
 * @v ibdev		Infiniband device
 * @ret rc		Return status code
 */
int register_ibdev ( struct ib_device *ibdev ) {
	int rc;

	/* Open link */
	if ( ( rc = ib_open ( ibdev ) ) != 0 )
		goto err_open;

	/* Wait for link */
	if ( ( rc = ib_wait_for_link ( ibdev ) ) != 0 )
		goto err_wait_for_link;

	/* Get MAD parameters */
	if ( ( rc = ib_get_mad_params ( ibdev ) ) != 0 )
		goto err_get_mad_params;

	/* Add IPoIB device */
	if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) {
		DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n",
		       ibdev, strerror ( rc ) );
		goto err_ipoib_probe;
	}

	return 0;

 err_ipoib_probe:
 err_get_mad_params:
 err_wait_for_link:
	ib_close ( ibdev );
 err_open:
	return rc;
}

/**
 * Unregister Infiniband device
 *
 * @v ibdev		Infiniband device
 */
void unregister_ibdev ( struct ib_device *ibdev ) {
	ipoib_remove ( ibdev );
	ib_close ( ibdev );
}

/**
 * Free Infiniband device
 *
 * @v ibdev		Infiniband device
 */
void free_ibdev ( struct ib_device *ibdev ) {
	free ( ibdev );
}