/* * Copyright (C) 2007 Michael Brown . * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /** @file * * Infiniband protocol * */ /** List of Infiniband devices */ struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices ); /** * Create completion queue * * @v ibdev Infiniband device * @v num_cqes Number of completion queue entries * @ret cq New completion queue */ struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes ) { struct ib_completion_queue *cq; int rc; DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev ); /* Allocate and initialise data structure */ cq = zalloc ( sizeof ( *cq ) ); if ( ! cq ) return NULL; cq->num_cqes = num_cqes; INIT_LIST_HEAD ( &cq->work_queues ); /* Perform device-specific initialisation and get CQN */ if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) { DBGC ( ibdev, "IBDEV %p could not initialise completion " "queue: %s\n", ibdev, strerror ( rc ) ); free ( cq ); return NULL; } DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) " "with CQN %#lx\n", ibdev, num_cqes, cq, ib_cq_get_drvdata ( cq ), cq->cqn ); return cq; } /** * Destroy completion queue * * @v ibdev Infiniband device * @v cq Completion queue */ void ib_destroy_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq ) { DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n", ibdev, cq->cqn ); assert ( list_empty ( &cq->work_queues ) ); ibdev->op->destroy_cq ( ibdev, cq ); free ( cq ); } /** * Create queue pair * * @v ibdev Infiniband device * @v num_send_wqes Number of send work queue entries * @v send_cq Send completion queue * @v num_recv_wqes Number of receive work queue entries * @v recv_cq Receive completion queue * @v qkey Queue key * @ret qp Queue pair */ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, unsigned int num_send_wqes, struct ib_completion_queue *send_cq, unsigned int num_recv_wqes, struct ib_completion_queue *recv_cq, unsigned long qkey ) { struct ib_queue_pair *qp; size_t total_size; int rc; DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev ); /* Allocate and initialise data structure */ total_size = ( sizeof ( *qp ) + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) + ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) ); qp = zalloc ( total_size ); if ( ! qp ) return NULL; qp->qkey = qkey; qp->send.qp = qp; qp->send.is_send = 1; qp->send.cq = send_cq; list_add ( &qp->send.list, &send_cq->work_queues ); qp->send.num_wqes = num_send_wqes; qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) ); qp->recv.qp = qp; qp->recv.cq = recv_cq; list_add ( &qp->recv.list, &recv_cq->work_queues ); qp->recv.num_wqes = num_recv_wqes; qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) )); /* Perform device-specific initialisation and get QPN */ if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) { DBGC ( ibdev, "IBDEV %p could not initialise queue pair: " "%s\n", ibdev, strerror ( rc ) ); list_del ( &qp->send.list ); list_del ( &qp->recv.list ); free ( qp ); return NULL; } DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n", ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn ); DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n", ibdev, qp->qpn, num_send_wqes, qp->send.iobufs, qp->recv.iobufs ); DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n", ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs, ( ( ( void * ) qp ) + total_size ) ); return qp; } /** * Modify queue pair * * @v ibdev Infiniband device * @v qp Queue pair * @v mod_list Modification list * @v qkey New queue key, if applicable * @ret rc Return status code */ int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp, unsigned long mod_list, unsigned long qkey ) { int rc; DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn ); if ( mod_list & IB_MODIFY_QKEY ) qp->qkey = qkey; if ( ( rc = ibdev->op->modify_qp ( ibdev, qp, mod_list ) ) != 0 ) { DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n", ibdev, qp->qpn, strerror ( rc ) ); return rc; } return 0; } /** * Destroy queue pair * * @v ibdev Infiniband device * @v qp Queue pair */ void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) { DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n", ibdev, qp->qpn ); ibdev->op->destroy_qp ( ibdev, qp ); list_del ( &qp->send.list ); list_del ( &qp->recv.list ); free ( qp ); } /** * Find work queue belonging to completion queue * * @v cq Completion queue * @v qpn Queue pair number * @v is_send Find send work queue (rather than receive) * @ret wq Work queue, or NULL if not found */ struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, unsigned long qpn, int is_send ) { struct ib_work_queue *wq; list_for_each_entry ( wq, &cq->work_queues, list ) { if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) ) return wq; } return NULL; } /*************************************************************************** * * Management datagram operations * *************************************************************************** */ /** * Get port information * * @v ibdev Infiniband device * @v port_info Port information datagram to fill in * @ret rc Return status code */ static int ib_get_port_info ( struct ib_device *ibdev, struct ib_mad_port_info *port_info ) { struct ib_mad_hdr *hdr = &port_info->mad_hdr; int rc; /* Construct MAD */ memset ( port_info, 0, sizeof ( *port_info ) ); hdr->base_version = IB_MGMT_BASE_VERSION; hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; hdr->class_version = 1; hdr->method = IB_MGMT_METHOD_GET; hdr->attr_id = htons ( IB_SMP_ATTR_PORT_INFO ); hdr->attr_mod = htonl ( ibdev->port ); if ( ( rc = ib_mad ( ibdev, hdr, sizeof ( *port_info ) ) ) != 0 ) { DBGC ( ibdev, "IBDEV %p could not get port info: %s\n", ibdev, strerror ( rc ) ); return rc; } return 0; } /** * Get GUID information * * @v ibdev Infiniband device * @v guid_info GUID information datagram to fill in * @ret rc Return status code */ static int ib_get_guid_info ( struct ib_device *ibdev, struct ib_mad_guid_info *guid_info ) { struct ib_mad_hdr *hdr = &guid_info->mad_hdr; int rc; /* Construct MAD */ memset ( guid_info, 0, sizeof ( *guid_info ) ); hdr->base_version = IB_MGMT_BASE_VERSION; hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; hdr->class_version = 1; hdr->method = IB_MGMT_METHOD_GET; hdr->attr_id = htons ( IB_SMP_ATTR_GUID_INFO ); if ( ( rc = ib_mad ( ibdev, hdr, sizeof ( *guid_info ) ) ) != 0 ) { DBGC ( ibdev, "IBDEV %p could not get GUID info: %s\n", ibdev, strerror ( rc ) ); return rc; } return 0; } /** * Get partition key table * * @v ibdev Infiniband device * @v guid_info Partition key table datagram to fill in * @ret rc Return status code */ static int ib_get_pkey_table ( struct ib_device *ibdev, struct ib_mad_pkey_table *pkey_table ) { struct ib_mad_hdr *hdr = &pkey_table->mad_hdr; int rc; /* Construct MAD */ memset ( pkey_table, 0, sizeof ( *pkey_table ) ); hdr->base_version = IB_MGMT_BASE_VERSION; hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; hdr->class_version = 1; hdr->method = IB_MGMT_METHOD_GET; hdr->attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE ); if ( ( rc = ib_mad ( ibdev, hdr, sizeof ( *pkey_table ) ) ) != 0 ) { DBGC ( ibdev, "IBDEV %p could not get pkey table: %s\n", ibdev, strerror ( rc ) ); return rc; } return 0; } /** * Get MAD parameters * * @v ibdev Infiniband device * @ret rc Return status code */ static int ib_get_mad_params ( struct ib_device *ibdev ) { union { /* This union exists just to save stack space */ struct ib_mad_port_info port_info; struct ib_mad_guid_info guid_info; struct ib_mad_pkey_table pkey_table; } u; int rc; /* Port info gives us the link state, the first half of the * port GID and the SM LID. */ if ( ( rc = ib_get_port_info ( ibdev, &u.port_info ) ) != 0 ) return rc; ibdev->link_up = ( ( u.port_info.port_state__link_speed_supported & 0xf ) == 4 ); memcpy ( &ibdev->port_gid.u.bytes[0], u.port_info.gid_prefix, 8 ); ibdev->sm_lid = ntohs ( u.port_info.mastersm_lid ); /* GUID info gives us the second half of the port GID */ if ( ( rc = ib_get_guid_info ( ibdev, &u.guid_info ) ) != 0 ) return rc; memcpy ( &ibdev->port_gid.u.bytes[8], u.guid_info.gid_local, 8 ); /* Get partition key */ if ( ( rc = ib_get_pkey_table ( ibdev, &u.pkey_table ) ) != 0 ) return rc; ibdev->pkey = ntohs ( u.pkey_table.pkey[0][0] ); DBGC ( ibdev, "IBDEV %p port GID is %08lx:%08lx:%08lx:%08lx\n", ibdev, htonl ( ibdev->port_gid.u.dwords[0] ), htonl ( ibdev->port_gid.u.dwords[1] ), htonl ( ibdev->port_gid.u.dwords[2] ), htonl ( ibdev->port_gid.u.dwords[3] ) ); return 0; } /*************************************************************************** * * Event queues * *************************************************************************** */ /** * Handle Infiniband link state change * * @v ibdev Infiniband device */ void ib_link_state_changed ( struct ib_device *ibdev ) { int rc; /* Update MAD parameters */ if ( ( rc = ib_get_mad_params ( ibdev ) ) != 0 ) { DBGC ( ibdev, "IBDEV %p could not update MAD parameters: %s\n", ibdev, strerror ( rc ) ); return; } /* Notify IPoIB of link state change */ ipoib_link_state_changed ( ibdev ); } /** * Single-step the Infiniband event queue * * @v process Infiniband event queue process */ static void ib_step ( struct process *process __unused ) { struct ib_device *ibdev; list_for_each_entry ( ibdev, &ib_devices, list ) { ibdev->op->poll_eq ( ibdev ); } } /** Infiniband event queue process */ struct process ib_process __permanent_process = { .step = ib_step, }; /*************************************************************************** * * Infiniband device creation/destruction * *************************************************************************** */ /** * Allocate Infiniband device * * @v priv_size Size of driver private data area * @ret ibdev Infiniband device, or NULL */ struct ib_device * alloc_ibdev ( size_t priv_size ) { struct ib_device *ibdev; void *drv_priv; size_t total_len; total_len = ( sizeof ( *ibdev ) + priv_size ); ibdev = zalloc ( total_len ); if ( ibdev ) { drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) ); ib_set_drvdata ( ibdev, drv_priv ); } return ibdev; } /** * Register Infiniband device * * @v ibdev Infiniband device * @ret rc Return status code */ int register_ibdev ( struct ib_device *ibdev ) { int rc; /* Add to device list */ ibdev_get ( ibdev ); list_add_tail ( &ibdev->list, &ib_devices ); /* Open link */ if ( ( rc = ib_open ( ibdev ) ) != 0 ) goto err_open; /* Get MAD parameters */ if ( ( rc = ib_get_mad_params ( ibdev ) ) != 0 ) goto err_get_mad_params; /* Add IPoIB device */ if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) { DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n", ibdev, strerror ( rc ) ); goto err_ipoib_probe; } DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev, ibdev->dev->name ); return 0; err_ipoib_probe: err_get_mad_params: ib_close ( ibdev ); err_open: list_del ( &ibdev->list ); ibdev_put ( ibdev ); return rc; } /** * Unregister Infiniband device * * @v ibdev Infiniband device */ void unregister_ibdev ( struct ib_device *ibdev ) { /* Close device */ ipoib_remove ( ibdev ); ib_close ( ibdev ); /* Remove from device list */ list_del ( &ibdev->list ); ibdev_put ( ibdev ); DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev ); }