summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Brown2007-09-17 06:04:58 +0200
committerMichael Brown2007-09-17 06:04:58 +0200
commit4e78a53cf26b85736123eee29d23d637b4a3883f (patch)
tree319b933d03e9095460ff53ee22e5619f93f81a70
parentRead port GID directly using MAD IFC. (diff)
downloadipxe-4e78a53cf26b85736123eee29d23d637b4a3883f.tar.gz
ipxe-4e78a53cf26b85736123eee29d23d637b4a3883f.tar.xz
ipxe-4e78a53cf26b85736123eee29d23d637b4a3883f.zip
IPoIB code separated out to ipoib.c.
-rw-r--r--src/drivers/net/ipoib.c411
-rw-r--r--src/drivers/net/mlx_ipoib/ib_driver.c5
-rw-r--r--src/drivers/net/mlx_ipoib/ib_driver.h2
-rw-r--r--src/drivers/net/mlx_ipoib/ib_mt25218.c2
-rw-r--r--src/drivers/net/mlx_ipoib/ipoib.c2
-rw-r--r--src/drivers/net/mlx_ipoib/mt25218.c143
-rw-r--r--src/include/gpxe/errfile.h2
-rw-r--r--src/include/gpxe/infiniband.h149
-rw-r--r--src/include/gpxe/ipoib.h78
-rw-r--r--src/net/infiniband.c32
10 files changed, 783 insertions, 43 deletions
diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c
new file mode 100644
index 00000000..9eed6b39
--- /dev/null
+++ b/src/drivers/net/ipoib.c
@@ -0,0 +1,411 @@
+/*
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <gpxe/if_arp.h>
+#include <gpxe/iobuf.h>
+#include <gpxe/netdevice.h>
+#include <gpxe/infiniband.h>
+#include <gpxe/ipoib.h>
+
+/** @file
+ *
+ * IP over Infiniband
+ */
+
+
+
+
+
+extern unsigned long hack_ipoib_qkey;
+extern struct ib_address_vector hack_ipoib_bcast_av;
+
+
+
+/** IPoIB MTU */
+#define IPOIB_MTU 2048
+
+/** Number of IPoIB send work queue entries */
+#define IPOIB_NUM_SEND_WQES 8
+
+/** Number of IPoIB receive work queue entries */
+#define IPOIB_NUM_RECV_WQES 8
+
+/** Number of IPoIB completion entries */
+#define IPOIB_NUM_CQES 8
+
+struct ipoib_device {
+ struct ib_device *ibdev;
+ struct ib_completion_queue *cq;
+ struct ib_queue_pair *qp;
+ unsigned int rx_fill;
+};
+
+/****************************************************************************
+ *
+ * IPoIB link layer
+ *
+ ****************************************************************************
+ */
+
+/** Broadcast IPoIB address */
+static struct ipoib_mac ipoib_broadcast = {
+ .gid = { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } },
+};
+
+/**
+ * Transmit IPoIB packet
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v net_protocol Network-layer protocol
+ * @v ll_dest Link-layer destination address
+ *
+ * Prepends the IPoIB link-layer header and transmits the packet.
+ */
+static int ipoib_tx ( struct io_buffer *iobuf, struct net_device *netdev,
+ struct net_protocol *net_protocol,
+ const void *ll_dest ) {
+ struct ipoib_hdr *ipoib_hdr =
+ iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
+
+ /* Build IPoIB header */
+ memcpy ( &ipoib_hdr->pseudo.peer, ll_dest,
+ sizeof ( ipoib_hdr->pseudo.peer ) );
+ ipoib_hdr->real.proto = net_protocol->net_proto;
+ ipoib_hdr->real.reserved = 0;
+
+ /* Hand off to network device */
+ return netdev_tx ( netdev, iobuf );
+}
+
+/**
+ * Process received IPoIB packet
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ *
+ * Strips off the IPoIB link-layer header and passes up to the
+ * network-layer protocol.
+ */
+static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) {
+ struct ipoib_hdr *ipoib_hdr = iobuf->data;
+
+ /* Sanity check */
+ if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
+ DBG ( "IPoIB packet too short (%d bytes)\n",
+ iob_len ( iobuf ) );
+ free_iob ( iobuf );
+ return -EINVAL;
+ }
+
+ /* Strip off IPoIB header */
+ iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
+
+ /* Hand off to network-layer protocol */
+ return net_rx ( iobuf, netdev, ipoib_hdr->real.proto,
+ &ipoib_hdr->pseudo.peer );
+}
+
+/**
+ * Transcribe IPoIB address
+ *
+ * @v ll_addr Link-layer address
+ * @ret string Link-layer address in human-readable format
+ */
+const char * ipoib_ntoa ( const void *ll_addr ) {
+ static char buf[61];
+ const uint8_t *ipoib_addr = ll_addr;
+ unsigned int i;
+ char *p = buf;
+
+ for ( i = 0 ; i < IPOIB_ALEN ; i++ ) {
+ p += sprintf ( p, ":%02x", ipoib_addr[i] );
+ }
+ return ( buf + 1 );
+}
+
+/** IPoIB protocol */
+struct ll_protocol ipoib_protocol __ll_protocol = {
+ .name = "IPoIB",
+ .ll_proto = htons ( ARPHRD_INFINIBAND ),
+ .ll_addr_len = IPOIB_ALEN,
+ .ll_header_len = IPOIB_HLEN,
+ .ll_broadcast = ( uint8_t * ) &ipoib_broadcast,
+ .tx = ipoib_tx,
+ .rx = ipoib_rx,
+ .ntoa = ipoib_ntoa,
+};
+
+/****************************************************************************
+ *
+ * IPoIB network device
+ *
+ ****************************************************************************
+ */
+
+/**
+ * Transmit packet via IPoIB network device
+ *
+ * @v netdev Network device
+ * @v iobuf I/O buffer
+ * @ret rc Return status code
+ */
+static int ipoib_transmit ( struct net_device *netdev,
+ struct io_buffer *iobuf ) {
+ struct ipoib_device *ipoib = netdev->priv;
+ struct ib_device *ibdev = ipoib->ibdev;
+ struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data;
+
+ if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) {
+ DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
+ return -EINVAL;
+ }
+
+ iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) );
+ return ib_post_send ( ibdev, ipoib->qp,
+ &hack_ipoib_bcast_av, iobuf );
+}
+
+/**
+ * Handle IPoIB send completion
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v completion Completion
+ * @v iobuf I/O buffer
+ */
+static void ipoib_complete_send ( struct ib_device *ibdev __unused,
+ struct ib_queue_pair *qp,
+ struct ib_completion *completion,
+ struct io_buffer *iobuf ) {
+ struct net_device *netdev = qp->owner_priv;
+
+ netdev_tx_complete_err ( netdev, iobuf,
+ ( completion->syndrome ? -EIO : 0 ) );
+}
+
+/**
+ * Handle IPoIB receive completion
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v completion Completion
+ * @v iobuf I/O buffer
+ */
+static void ipoib_complete_recv ( struct ib_device *ibdev __unused,
+ struct ib_queue_pair *qp,
+ struct ib_completion *completion,
+ struct io_buffer *iobuf ) {
+ struct net_device *netdev = qp->owner_priv;
+ struct ipoib_device *ipoib = netdev->priv;
+ struct ib_global_route_header *grh = iobuf->data;
+ struct ipoib_pseudo_hdr *ipoib_pshdr;
+
+ if ( completion->syndrome ) {
+ netdev_rx_err ( netdev, iobuf, -EIO );
+ } else {
+ iob_put ( iobuf, completion->len );
+ iob_pull ( iobuf, ( sizeof ( *grh ) -
+ sizeof ( *ipoib_pshdr ) ) );
+ /* FIXME: fill in a MAC address for the sake of AoE! */
+ netdev_rx ( netdev, iobuf );
+ }
+
+ ipoib->rx_fill--;
+}
+
+/**
+ * Refill IPoIB receive ring
+ *
+ * @v ipoib IPoIB device
+ */
+static void ipoib_refill_recv ( struct ipoib_device *ipoib ) {
+ struct ib_device *ibdev = ipoib->ibdev;
+ struct io_buffer *iobuf;
+ int rc;
+
+ while ( ipoib->rx_fill < IPOIB_NUM_RECV_WQES ) {
+ iobuf = alloc_iob ( IPOIB_MTU );
+ if ( ! iobuf )
+ break;
+ if ( ( rc = ib_post_recv ( ibdev, ipoib->qp,
+ iobuf ) ) != 0 ) {
+ free_iob ( iobuf );
+ break;
+ }
+ ipoib->rx_fill++;
+ }
+}
+
+/**
+ * Poll IPoIB network device
+ *
+ * @v netdev Network device
+ */
+static void ipoib_poll ( struct net_device *netdev ) {
+ struct ipoib_device *ipoib = netdev->priv;
+ struct ib_device *ibdev = ipoib->ibdev;
+
+ ib_poll_cq ( ibdev, ipoib->cq, ipoib_complete_send,
+ ipoib_complete_recv );
+ ipoib_refill_recv ( ipoib );
+}
+
+/**
+ * Enable/disable interrupts on IPoIB network device
+ *
+ * @v netdev Network device
+ * @v enable Interrupts should be enabled
+ */
+static void ipoib_irq ( struct net_device *netdev __unused,
+ int enable __unused ) {
+ /* No implementation */
+}
+
+/**
+ * Open IPoIB network device
+ *
+ * @v netdev Network device
+ * @ret rc Return status code
+ */
+static int ipoib_open ( struct net_device *netdev ) {
+ struct ipoib_device *ipoib = netdev->priv;
+ struct ib_device *ibdev = ipoib->ibdev;
+ int rc;
+
+ /* Attach to broadcast multicast GID */
+ if ( ( rc = ib_mcast_attach ( ibdev, ipoib->qp,
+ &ibdev->broadcast_gid ) ) != 0 ) {
+ DBG ( "Could not attach to broadcast GID: %s\n",
+ strerror ( rc ) );
+ return rc;
+ }
+
+ /* Fill receive ring */
+ ipoib_refill_recv ( ipoib );
+
+ return 0;
+}
+
+/**
+ * Close IPoIB network device
+ *
+ * @v netdev Network device
+ */
+static void ipoib_close ( struct net_device *netdev ) {
+ struct ipoib_device *ipoib = netdev->priv;
+ struct ib_device *ibdev = ipoib->ibdev;
+
+ /* Detach from broadcast multicast GID */
+ ib_mcast_detach ( ibdev, ipoib->qp, &ipoib_broadcast.gid );
+
+ /* FIXME: should probably flush the receive ring */
+}
+
+/** IPoIB network device operations */
+static struct net_device_operations ipoib_operations = {
+ .open = ipoib_open,
+ .close = ipoib_close,
+ .transmit = ipoib_transmit,
+ .poll = ipoib_poll,
+ .irq = ipoib_irq,
+};
+
+/**
+ * Probe IPoIB device
+ *
+ * @v ibdev Infiniband device
+ * @ret rc Return status code
+ */
+int ipoib_probe ( struct ib_device *ibdev ) {
+ struct net_device *netdev;
+ struct ipoib_device *ipoib;
+ struct ipoib_mac *mac;
+ int rc;
+
+ /* Allocate network device */
+ netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
+ if ( ! netdev )
+ return -ENOMEM;
+ netdev_init ( netdev, &ipoib_operations );
+ ipoib = netdev->priv;
+ ib_set_ownerdata ( ibdev, netdev );
+ netdev->dev = ibdev->dev;
+ memset ( ipoib, 0, sizeof ( *ipoib ) );
+ ipoib->ibdev = ibdev;
+
+ /* Allocate completion queue */
+ ipoib->cq = ib_create_cq ( ibdev, IPOIB_NUM_CQES );
+ if ( ! ipoib->cq ) {
+ DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n",
+ ipoib );
+ rc = -ENOMEM;
+ goto err_create_cq;
+ }
+
+ /* Allocate queue pair */
+ ipoib->qp = ib_create_qp ( ibdev, IPOIB_NUM_SEND_WQES,
+ ipoib->cq, IPOIB_NUM_RECV_WQES,
+ ipoib->cq, hack_ipoib_qkey );
+ if ( ! ipoib->qp ) {
+ DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n",
+ ipoib );
+ rc = -ENOMEM;
+ goto err_create_qp;
+ }
+ ipoib->qp->owner_priv = netdev;
+
+ /* Construct MAC address */
+ mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
+ mac->qpn = htonl ( ipoib->qp->qpn );
+ memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) );
+
+ /* Register network device */
+ if ( ( rc = register_netdev ( netdev ) ) != 0 )
+ goto err_register_netdev;
+
+ return 0;
+
+ err_register_netdev:
+ ib_destroy_qp ( ibdev, ipoib->qp );
+ err_create_qp:
+ ib_destroy_cq ( ibdev, ipoib->cq );
+ err_create_cq:
+ netdev_nullify ( netdev );
+ netdev_put ( netdev );
+ return rc;
+}
+
+/**
+ * Remove IPoIB device
+ *
+ * @v ibdev Infiniband device
+ */
+void ipoib_remove ( struct ib_device *ibdev ) {
+ struct net_device *netdev = ib_get_ownerdata ( ibdev );
+
+ unregister_netdev ( netdev );
+ netdev_nullify ( netdev );
+ netdev_put ( netdev );
+}
diff --git a/src/drivers/net/mlx_ipoib/ib_driver.c b/src/drivers/net/mlx_ipoib/ib_driver.c
index 590fb94d..34d4cbaa 100644
--- a/src/drivers/net/mlx_ipoib/ib_driver.c
+++ b/src/drivers/net/mlx_ipoib/ib_driver.c
@@ -63,6 +63,7 @@ static int wait_logic_link_up(__u8 port)
}
unsigned long ipoib_qkey;
+unsigned long hack_ipoib_qkey;
static int ib_driver_init(struct pci_device *pci, udqp_t * ipoib_qph_p)
{
@@ -149,7 +150,7 @@ static int ib_driver_init(struct pci_device *pci, udqp_t * ipoib_qph_p)
qkey, mlid);
}
- ipoib_qkey = qkey;
+ hack_ipoib_qkey = ipoib_qkey = qkey;
#if 0
rc = create_ipoib_qp(&ib_data.ipoib_qp,
@@ -285,7 +286,7 @@ static int poll_cqe_tout(cq_t cqh, __u16 tout, void **wqe, int *good_p)
end = currticks() + tout;
do {
- rc = ib_poll_cq(cqh, &ib_cqe, &num_cqes);
+ rc = ib_poll_cqx(cqh, &ib_cqe, &num_cqes);
if (rc)
return rc;
diff --git a/src/drivers/net/mlx_ipoib/ib_driver.h b/src/drivers/net/mlx_ipoib/ib_driver.h
index 6dca8d30..7fc57364 100644
--- a/src/drivers/net/mlx_ipoib/ib_driver.h
+++ b/src/drivers/net/mlx_ipoib/ib_driver.h
@@ -153,7 +153,7 @@ static int gw_read_cr(__u32 addr, __u32 * result);
static int gw_write_cr(__u32 addr, __u32 data);
static ud_av_t alloc_ud_av(void);
static void free_ud_av(ud_av_t av);
-static int ib_poll_cq(cq_t cq, struct ib_cqe_st *ib_cqe_p, __u8 * num_cqes);
+static int ib_poll_cqx(cq_t cq, struct ib_cqe_st *ib_cqe_p, __u8 * num_cqes);
static int add_qp_to_mcast_group(union ib_gid_u mcast_gid, __u8 add);
static int clear_interrupt(void);
static int poll_cqe_tout(cq_t cqh, __u16 tout, void **wqe, int *good_p);
diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c
index ba1108a3..a5d251d4 100644
--- a/src/drivers/net/mlx_ipoib/ib_mt25218.c
+++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c
@@ -1730,7 +1730,7 @@ static void dev2ib_cqe(struct ib_cqe_st *ib_cqe_p, union cqe_st *cqe_p)
byte_cnt);
}
-static int ib_poll_cq(void *cqh, struct ib_cqe_st *ib_cqe_p, u8 * num_cqes)
+static int ib_poll_cqx(void *cqh, struct ib_cqe_st *ib_cqe_p, u8 * num_cqes)
{
int rc;
union cqe_st cqe;
diff --git a/src/drivers/net/mlx_ipoib/ipoib.c b/src/drivers/net/mlx_ipoib/ipoib.c
index d4124f21..d8dd6bf6 100644
--- a/src/drivers/net/mlx_ipoib/ipoib.c
+++ b/src/drivers/net/mlx_ipoib/ipoib.c
@@ -879,7 +879,7 @@ static int ipoib_read_packet(__u16 * prot_p, void *data, unsigned int *size_p,
void *buf, *out_buf;
__u16 prot_type;
- rc = ib_poll_cq(ipoib_data.rcv_cqh, &ib_cqe, &num_cqes);
+ rc = ib_poll_cqx(ipoib_data.rcv_cqh, &ib_cqe, &num_cqes);
if (rc) {
return rc;
}
diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c
index aed6d208..6aa4e7fe 100644
--- a/src/drivers/net/mlx_ipoib/mt25218.c
+++ b/src/drivers/net/mlx_ipoib/mt25218.c
@@ -16,6 +16,7 @@ Skeleton NIC driver for Etherboot
#include <gpxe/iobuf.h>
#include <gpxe/netdevice.h>
#include <gpxe/infiniband.h>
+#include <gpxe/ipoib.h>
/* to get some global routines like printf */
#include "etherboot.h"
@@ -29,11 +30,18 @@ Skeleton NIC driver for Etherboot
#include "arbel.h"
+struct ib_address_vector hack_ipoib_bcast_av;
+
+
+
+
static const struct ib_gid arbel_no_gid = {
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 }
};
+#if 0
+
#define MLX_RX_MAX_FILL NUM_IPOIB_RCV_WQES
struct mlx_nic {
@@ -275,6 +283,7 @@ static void mlx_poll ( struct net_device *netdev ) {
&static_ipoib_send_cq,
#endif
temp_complete_send, temp_complete_recv );
+#if 0
arbel_poll_cq ( &static_ibdev,
#if CREATE_OWN
mlx->own_recv_cq,
@@ -282,6 +291,7 @@ static void mlx_poll ( struct net_device *netdev ) {
&static_ipoib_recv_cq,
#endif
temp_complete_send, temp_complete_recv );
+#endif
mlx_refill_rx ( netdev );
}
@@ -308,6 +318,8 @@ static struct net_device_operations mlx_operations = {
};
+#endif /* 0 */
+
/***************************************************************************
@@ -1488,6 +1500,8 @@ static int arbel_get_port_gid ( struct arbel *arbel, struct ib_gid *gid ) {
+#if 0
+
/**
* Probe PCI device
*
@@ -1576,14 +1590,17 @@ static int arbel_probe ( struct pci_device *pci,
DBG ( "Could not create send CQ\n" );
return -EIO;
}
+#if 0
mlx->own_recv_cq = ib_create_cq ( ibdev, 32 );
if ( ! mlx->own_recv_cq ) {
DBG ( "Could not create send CQ\n" );
return -EIO;
}
+#endif
mlx->own_qp = ib_create_qp ( ibdev, NUM_IPOIB_SND_WQES,
mlx->own_send_cq, NUM_IPOIB_RCV_WQES,
- mlx->own_recv_cq, ipoib_qkey );
+ //mlx->own_recv_cq, ipoib_qkey );
+ mlx->own_send_cq, ipoib_qkey );
if ( ! mlx->own_qp ) {
DBG ( "Could not create QP\n" );
return -EIO;
@@ -1621,6 +1638,22 @@ static int arbel_probe ( struct pci_device *pci,
}
#endif
+ ibdev->dev = &pci->dev;
+
+
+ struct ud_av_st *bcast_av = mlx->bcast_av;
+ struct arbelprm_ud_address_vector *bav =
+ ( struct arbelprm_ud_address_vector * ) &bcast_av->av;
+ struct ib_address_vector *av = &hack_ipoib_bcast_av;
+ av->dest_qp = bcast_av->dest_qp;
+ av->qkey = bcast_av->qkey;
+ av->dlid = MLX_GET ( bav, rlid );
+ av->rate = ( MLX_GET ( bav, max_stat_rate ) ? 1 : 4 );
+ av->sl = MLX_GET ( bav, sl );
+ av->gid_present = 1;
+ memcpy ( &av->gid, ( ( void * ) bav ) + 16, 16 );
+
+
/* Register network device */
if ( ( rc = register_netdev ( netdev ) ) != 0 )
goto err_register_netdev;
@@ -1650,6 +1683,114 @@ static void arbel_remove ( struct pci_device *pci ) {
netdev_put ( netdev );
}
+#endif /* 0 */
+
+
+
+/**
+ * Probe PCI device
+ *
+ * @v pci PCI device
+ * @v id PCI ID
+ * @ret rc Return status code
+ */
+static int arbel_probe ( struct pci_device *pci,
+ const struct pci_device_id *id __unused ) {
+ struct ib_device *ibdev;
+ struct arbelprm_query_dev_lim dev_lim;
+ struct arbel *arbel;
+ udqp_t qph;
+ int rc;
+
+ /* Allocate Infiniband device */
+ ibdev = alloc_ibdev ( sizeof ( *arbel ) );
+ if ( ! ibdev )
+ return -ENOMEM;
+ ibdev->op = &arbel_ib_operations;
+ pci_set_drvdata ( pci, ibdev );
+ ibdev->dev = &pci->dev;
+ arbel = ibdev->dev_priv;
+ memset ( arbel, 0, sizeof ( *arbel ) );
+
+ /* Fix up PCI device */
+ adjust_pci_device ( pci );
+
+ /* Initialise hardware */
+ if ( ( rc = ib_driver_init ( pci, &qph ) ) != 0 )
+ goto err_ib_driver_init;
+
+ /* Hack up IB structures */
+ arbel->config = memfree_pci_dev.cr_space;
+ arbel->mailbox_in = dev_buffers_p->inprm_buf;
+ arbel->mailbox_out = dev_buffers_p->outprm_buf;
+ arbel->uar = memfree_pci_dev.uar;
+ arbel->db_rec = dev_ib_data.uar_context_base;
+ arbel->reserved_lkey = dev_ib_data.mkey;
+ arbel->eqn = dev_ib_data.eq.eqn;
+
+ /* Get device limits */
+ if ( ( rc = arbel_cmd_query_dev_lim ( arbel, &dev_lim ) ) != 0 ) {
+ DBGC ( arbel, "Arbel %p could not get device limits: %s\n",
+ arbel, strerror ( rc ) );
+ goto err_query_dev_lim;
+ }
+ arbel->limits.reserved_uars = MLX_GET ( &dev_lim, num_rsvd_uars );
+ arbel->limits.reserved_cqs =
+ ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) );
+ arbel->limits.reserved_qps =
+ ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) );
+
+ /* Get port GID */
+ if ( ( rc = arbel_get_port_gid ( arbel, &ibdev->port_gid ) ) != 0 ) {
+ DBGC ( arbel, "Arbel %p could not determine port GID: %s\n",
+ arbel, strerror ( rc ) );
+ goto err_get_port_gid;
+ }
+
+ struct ud_av_st *bcast_av = ib_data.bcast_av;
+ struct arbelprm_ud_address_vector *bav =
+ ( struct arbelprm_ud_address_vector * ) &bcast_av->av;
+ struct ib_address_vector *av = &hack_ipoib_bcast_av;
+ av->dest_qp = bcast_av->dest_qp;
+ av->qkey = bcast_av->qkey;
+ av->dlid = MLX_GET ( bav, rlid );
+ av->rate = ( MLX_GET ( bav, max_stat_rate ) ? 1 : 4 );
+ av->sl = MLX_GET ( bav, sl );
+ av->gid_present = 1;
+ memcpy ( &av->gid, ( ( void * ) bav ) + 16, 16 );
+
+ memcpy ( &ibdev->broadcast_gid, &ib_data.bcast_gid, 16 );
+
+ /* Add IPoIB device */
+ if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) {
+ DBGC ( arbel, "Arbel %p could not add IPoIB device: %s\n",
+ arbel, strerror ( rc ) );
+ goto err_ipoib_probe;
+ }
+
+ return 0;
+
+ err_ipoib_probe:
+ err_get_port_gid:
+ err_query_dev_lim:
+ ib_driver_close ( 0 );
+ err_ib_driver_init:
+ free_ibdev ( ibdev );
+ return rc;
+}
+
+/**
+ * Remove PCI device
+ *
+ * @v pci PCI device
+ */
+static void arbel_remove ( struct pci_device *pci ) {
+ struct ib_device *ibdev = pci_get_drvdata ( pci );
+
+ ipoib_remove ( ibdev );
+ ib_driver_close ( 0 );
+}
+
static struct pci_device_id arbel_nics[] = {
PCI_ROM ( 0x15b3, 0x6282, "MT25218", "MT25218 HCA driver" ),
PCI_ROM ( 0x15b3, 0x6274, "MT25204", "MT25204 HCA driver" ),
diff --git a/src/include/gpxe/errfile.h b/src/include/gpxe/errfile.h
index 3413f9cf..325d2387 100644
--- a/src/include/gpxe/errfile.h
+++ b/src/include/gpxe/errfile.h
@@ -101,6 +101,8 @@
#define ERRFILE_via_rhine ( ERRFILE_DRIVER | 0x00440000 )
#define ERRFILE_via_velocity ( ERRFILE_DRIVER | 0x00450000 )
#define ERRFILE_w89c840 ( ERRFILE_DRIVER | 0x00460000 )
+#define ERRFILE_ipoib ( ERRFILE_DRIVER | 0x00470000 )
+#define ERRFILE_mt25218 ( ERRFILE_DRIVER | 0x00480000 )
#define ERRFILE_scsi ( ERRFILE_DRIVER | 0x00700000 )
diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h
index 3f09808c..e9e0121d 100644
--- a/src/include/gpxe/infiniband.h
+++ b/src/include/gpxe/infiniband.h
@@ -8,34 +8,11 @@
*/
#include <stdint.h>
-#include <gpxe/netdevice.h>
+#include <gpxe/device.h>
-/** An Infiniband Global Identifier */
-struct ib_gid {
- uint8_t bytes[16];
-};
-/** An Infiniband Global Route Header */
-struct ib_global_route_header {
- /** IP version, traffic class, and flow label
- *
- * 4 bits : Version of the GRH
- * 8 bits : Traffic class
- * 20 bits : Flow label
- */
- uint32_t ipver_tclass_flowlabel;
- /** Payload length */
- uint16_t paylen;
- /** Next header */
- uint8_t nxthdr;
- /** Hop limit */
- uint8_t hoplmt;
- /** Source GID */
- struct ib_gid sgid;
- /** Destiniation GID */
- struct ib_gid dgid;
-} __attribute__ (( packed ));
+#if 0
/** Infiniband MAC address length */
#define IB_ALEN 20
@@ -60,9 +37,41 @@ struct ibhdr {
/** Reserved, must be zero */
uint16_t reserved;
} __attribute__ (( packed ));
+#endif
+
+
+
+
+
+/** An Infiniband Global Identifier */
+struct ib_gid {
+ uint8_t bytes[16];
+};
+
+/** An Infiniband Global Route Header */
+struct ib_global_route_header {
+ /** IP version, traffic class, and flow label
+ *
+ * 4 bits : Version of the GRH
+ * 8 bits : Traffic class
+ * 20 bits : Flow label
+ */
+ uint32_t ipver_tclass_flowlabel;
+ /** Payload length */
+ uint16_t paylen;
+ /** Next header */
+ uint8_t nxthdr;
+ /** Hop limit */
+ uint8_t hoplmt;
+ /** Source GID */
+ struct ib_gid sgid;
+ /** Destiniation GID */
+ struct ib_gid dgid;
+} __attribute__ (( packed ));
+
struct ib_device;
struct ib_queue_pair;
struct ib_completion_queue;
@@ -223,8 +232,7 @@ struct ib_device_operations {
struct ib_queue_pair *qp,
struct ib_address_vector *av,
struct io_buffer *iobuf );
- /**
- * Post receive work queue entry
+ /** Post receive work queue entry
*
* @v ibdev Infiniband device
* @v qp Queue pair
@@ -252,8 +260,7 @@ struct ib_device_operations {
struct ib_completion_queue *cq,
ib_completer_t complete_send,
ib_completer_t complete_recv );
- /**
- * Attach to multicast group
+ /** Attach to multicast group
*
* @v ibdev Infiniband device
* @v qp Queue pair
@@ -263,8 +270,7 @@ struct ib_device_operations {
int ( * mcast_attach ) ( struct ib_device *ibdev,
struct ib_queue_pair *qp,
struct ib_gid *gid );
- /**
- * Detach from multicast group
+ /** Detach from multicast group
*
* @v ibdev Infiniband device
* @v qp Queue pair
@@ -276,13 +282,19 @@ struct ib_device_operations {
};
/** An Infiniband device */
-struct ib_device {
+struct ib_device {
/** Port GID */
struct ib_gid port_gid;
+ /** Broadcast GID */
+ struct ib_gid broadcast_gid;
+ /** Underlying device */
+ struct device *dev;
/** Infiniband operations */
struct ib_device_operations *op;
/** Device private data */
void *dev_priv;
+ /** Owner private data */
+ void *owner_priv;
};
extern struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev,
@@ -297,6 +309,52 @@ extern void ib_destroy_qp ( struct ib_device *ibdev,
struct ib_queue_pair *qp );
extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
unsigned long qpn, int is_send );
+extern struct ib_device * alloc_ibdev ( size_t priv_size );
+extern void free_ibdev ( struct ib_device *ibdev );
+
+/**
+ * Post send work queue entry
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v av Address vector
+ * @v iobuf I/O buffer
+ * @ret rc Return status code
+ */
+static inline __attribute__ (( always_inline )) int
+ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ struct ib_address_vector *av, struct io_buffer *iobuf ) {
+ return ibdev->op->post_send ( ibdev, qp, av, iobuf );
+}
+
+/**
+ * Post receive work queue entry
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v iobuf I/O buffer
+ * @ret rc Return status code
+ */
+static inline __attribute__ (( always_inline )) int
+ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ struct io_buffer *iobuf ) {
+ return ibdev->op->post_recv ( ibdev, qp, iobuf );
+}
+
+/**
+ * Poll completion queue
+ *
+ * @v ibdev Infiniband device
+ * @v cq Completion queue
+ * @v complete_send Send completion handler
+ * @v complete_recv Receive completion handler
+ */
+static inline __attribute__ (( always_inline )) void
+ib_poll_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq,
+ ib_completer_t complete_send, ib_completer_t complete_recv ) {
+ ibdev->op->poll_cq ( ibdev, cq, complete_send, complete_recv );
+}
+
/**
* Attach to multicast group
@@ -325,6 +383,27 @@ ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
ibdev->op->mcast_detach ( ibdev, qp, gid );
}
+/**
+ * Set Infiniband owner-private data
+ *
+ * @v pci Infiniband device
+ * @v priv Private data
+ */
+static inline void ib_set_ownerdata ( struct ib_device *ibdev,
+ void *owner_priv ) {
+ ibdev->owner_priv = owner_priv;
+}
+
+/**
+ * Get Infiniband owner-private data
+ *
+ * @v pci Infiniband device
+ * @ret priv Private data
+ */
+static inline void * ib_get_ownerdata ( struct ib_device *ibdev ) {
+ return ibdev->owner_priv;
+}
+
/*****************************************************************************
*
* Management datagrams
@@ -435,9 +514,7 @@ union ib_mad {
-
-
-
+#if 0
extern struct ll_protocol infiniband_protocol;
@@ -459,4 +536,6 @@ static inline struct net_device * alloc_ibdev ( size_t priv_size ) {
return netdev;
}
+#endif
+
#endif /* _GPXE_INFINIBAND_H */
diff --git a/src/include/gpxe/ipoib.h b/src/include/gpxe/ipoib.h
new file mode 100644
index 00000000..0551687d
--- /dev/null
+++ b/src/include/gpxe/ipoib.h
@@ -0,0 +1,78 @@
+#ifndef _GPXE_IPOIB_H
+#define _GPXE_IPOIB_H
+
+/** @file
+ *
+ * IP over Infiniband
+ */
+
+#include <gpxe/infiniband.h>
+
+/** IPoIB MAC address length */
+#define IPOIB_ALEN 20
+
+/** An IPoIB MAC address */
+struct ipoib_mac {
+ /** Queue pair number
+ *
+ * MSB must be zero; QPNs are only 24-bit.
+ */
+ uint32_t qpn;
+ /** Port GID */
+ struct ib_gid gid;
+} __attribute__ (( packed ));
+
+/** IPoIB link-layer header length */
+#define IPOIB_HLEN 24
+
+/**
+ * IPoIB link-layer header pseudo portion
+ *
+ * This part doesn't actually exist on the wire, but it provides a
+ * convenient way to fit into the typical network device model.
+ */
+struct ipoib_pseudo_hdr {
+ /** Peer address */
+ struct ipoib_mac peer;
+} __attribute__ (( packed ));
+
+/** IPoIB link-layer header real portion */
+struct ipoib_real_hdr {
+ /** Network-layer protocol */
+ uint16_t proto;
+ /** Reserved, must be zero */
+ uint16_t reserved;
+} __attribute__ (( packed ));
+
+/** An IPoIB link-layer header */
+struct ipoib_hdr {
+ /** Pseudo portion */
+ struct ipoib_pseudo_hdr pseudo;
+ /** Real portion */
+ struct ipoib_real_hdr real;
+} __attribute__ (( packed ));
+
+extern struct ll_protocol ipoib_protocol;
+
+extern const char * ipoib_ntoa ( const void *ll_addr );
+
+/**
+ * Allocate IPoIB device
+ *
+ * @v priv_size Size of driver private data
+ * @ret netdev Network device, or NULL
+ */
+static inline struct net_device * alloc_ipoibdev ( size_t priv_size ) {
+ struct net_device *netdev;
+
+ netdev = alloc_netdev ( priv_size );
+ if ( netdev ) {
+ netdev->ll_protocol = &ipoib_protocol;
+ }
+ return netdev;
+}
+
+extern int ipoib_probe ( struct ib_device *ibdev );
+extern void ipoib_remove ( struct ib_device *ibdev );
+
+#endif /* _GPXE_IPOIB_H */
diff --git a/src/net/infiniband.c b/src/net/infiniband.c
index a9ca0e31..7a68b7d4 100644
--- a/src/net/infiniband.c
+++ b/src/net/infiniband.c
@@ -153,8 +153,6 @@ void ib_destroy_qp ( struct ib_device *ibdev,
free ( qp );
}
-
-
/**
* Find work queue belonging to completion queue
*
@@ -174,7 +172,35 @@ struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
return NULL;
}
+/**
+ * Allocate Infiniband device
+ *
+ * @v priv_size Size of private data area
+ * @ret ibdev Infiniband device, or NULL
+ */
+struct ib_device * alloc_ibdev ( size_t priv_size ) {
+ struct ib_device *ibdev;
+ size_t total_len;
+
+ total_len = ( sizeof ( *ibdev ) + priv_size );
+ ibdev = zalloc ( total_len );
+ if ( ibdev ) {
+ ibdev->dev_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
+ }
+ return ibdev;
+}
+/**
+ * Free Infiniband device
+ *
+ * @v ibdev Infiniband device
+ */
+void free_ibdev ( struct ib_device *ibdev ) {
+ free ( ibdev );
+}
+
+
+#if 0
/** Infiniband broadcast MAC address */
static uint8_t ib_broadcast[IB_ALEN] = { 0xff, };
@@ -259,3 +285,5 @@ struct ll_protocol infiniband_protocol __ll_protocol = {
.rx = ib_rx,
.ntoa = ib_ntoa,
};
+
+#endif