From d0974ec7166c42be774d32df954a0e71283fa245 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Fri, 17 Aug 2007 19:51:08 +0100 Subject: Separate out arch-independent parts of stdint.h --- src/include/stdint.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 src/include/stdint.h (limited to 'src/include') diff --git a/src/include/stdint.h b/src/include/stdint.h new file mode 100644 index 000000000..4b0e44f2e --- /dev/null +++ b/src/include/stdint.h @@ -0,0 +1,24 @@ +#ifndef _STDINT_H +#define _STDINT_H + +#include + +typedef int8_t s8; +typedef uint8_t u8; +typedef int16_t s16; +typedef uint16_t u16; +typedef int32_t s32; +typedef uint32_t u32; +typedef int64_t s64; +typedef uint64_t u64; + +typedef int8_t int8; +typedef uint8_t uint8; +typedef int16_t int16; +typedef uint16_t uint16; +typedef int32_t int32; +typedef uint32_t uint32; +typedef int64_t int64; +typedef uint64_t uint64; + +#endif /* _STDINT_H */ -- cgit v1.2.3-55-g7522 From 800c8b014b53b220144e77e43080128c3e00ef16 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 18 Aug 2007 18:04:50 +0100 Subject: Add barrier() primitive (was present in Eb5.4), used by some currently out-of-tree driver code. --- src/include/compiler.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/include') diff --git a/src/include/compiler.h b/src/include/compiler.h index b130f28fd..2151fc6ac 100644 --- a/src/include/compiler.h +++ b/src/include/compiler.h @@ -311,6 +311,11 @@ extern void dbg_hex_dump_da ( unsigned long dispaddr, */ #define __shared __asm__ ( "_shared_bss" ) +/** + * Optimisation barrier + */ +#define barrier() __asm__ __volatile__ ( "" : : : "memory" ) + #endif /* ASSEMBLY */ #endif /* COMPILER_H */ -- cgit v1.2.3-55-g7522 From a45a145b8c4da48de526ecd381ba24fe64ead848 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 19 Aug 2007 00:29:27 +0100 Subject: Added error IDs for all files in drivers/net; they're likely to need them. --- src/include/gpxe/errfile.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'src/include') diff --git a/src/include/gpxe/errfile.h b/src/include/gpxe/errfile.h index 48db1dc19..4f9e7bc65 100644 --- a/src/include/gpxe/errfile.h +++ b/src/include/gpxe/errfile.h @@ -75,6 +75,32 @@ #define ERRFILE_rtl8139 ( ERRFILE_DRIVER | 0x002a0000 ) #define ERRFILE_smc9000 ( ERRFILE_DRIVER | 0x002b0000 ) #define ERRFILE_tg3 ( ERRFILE_DRIVER | 0x002c0000 ) +#define ERRFILE_3c509_eisa ( ERRFILE_DRIVER | 0x002d0000 ) +#define ERRFILE_3c515 ( ERRFILE_DRIVER | 0x002e0000 ) +#define ERRFILE_3c529 ( ERRFILE_DRIVER | 0x002f0000 ) +#define ERRFILE_3c595 ( ERRFILE_DRIVER | 0x00300000 ) +#define ERRFILE_3c5x9 ( ERRFILE_DRIVER | 0x00310000 ) +#define ERRFILE_3c90x ( ERRFILE_DRIVER | 0x00320000 ) +#define ERRFILE_amd8111e ( ERRFILE_DRIVER | 0x00330000 ) +#define ERRFILE_davicom ( ERRFILE_DRIVER | 0x00340000 ) +#define ERRFILE_depca ( ERRFILE_DRIVER | 0x00350000 ) +#define ERRFILE_dmfe ( ERRFILE_DRIVER | 0x00360000 ) +#define ERRFILE_e1000 ( ERRFILE_DRIVER | 0x00370000 ) +#define ERRFILE_eepro100 ( ERRFILE_DRIVER | 0x00380000 ) +#define ERRFILE_epic100 ( ERRFILE_DRIVER | 0x00390000 ) +#define ERRFILE_forcedeth ( ERRFILE_DRIVER | 0x003a0000 ) +#define ERRFILE_mtd80x ( ERRFILE_DRIVER | 0x003b0000 ) +#define ERRFILE_ns83820 ( ERRFILE_DRIVER | 0x003c0000 ) +#define ERRFILE_ns8390 ( ERRFILE_DRIVER | 0x003d0000 ) +#define ERRFILE_pcnet32 ( ERRFILE_DRIVER | 0x003e0000 ) +#define ERRFILE_r8169 ( ERRFILE_DRIVER | 0x003f0000 ) +#define ERRFILE_sis900 ( ERRFILE_DRIVER | 0x00400000 ) +#define ERRFILE_sundance ( ERRFILE_DRIVER | 0x00410000 ) +#define ERRFILE_tlan ( ERRFILE_DRIVER | 0x00420000 ) +#define ERRFILE_tulip ( ERRFILE_DRIVER | 0x00430000 ) +#define ERRFILE_via_rhine ( ERRFILE_DRIVER | 0x00440000 ) +#define ERRFILE_via_velocity ( ERRFILE_DRIVER | 0x00450000 ) +#define ERRFILE_w89c840 ( ERRFILE_DRIVER | 0x00460000 ) #define ERRFILE_scsi ( ERRFILE_DRIVER | 0x00700000 ) -- cgit v1.2.3-55-g7522 From 7b6d11e7136cee21cc9a76614174abac999f6173 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Wed, 12 Sep 2007 22:17:43 +0100 Subject: Started IB driver rewrite --- src/drivers/net/mlx_ipoib/ib_mt25218.c | 7 ++ src/drivers/net/mlx_ipoib/mt25218.c | 206 ++++++++++++++++++++++++++++++++- src/include/gpxe/errfile.h | 1 + src/include/gpxe/infiniband.h | 52 +++++++++ src/include/gpxe/netdevice.h | 4 +- src/net/infiniband.c | 118 +++++++++++++++++++ 6 files changed, 381 insertions(+), 7 deletions(-) create mode 100644 src/include/gpxe/infiniband.h create mode 100644 src/net/infiniband.c (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index 8f3873e6e..631a95cba 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -1586,6 +1586,13 @@ static void prep_send_wqe_buf(void *qph, len += offset; } snd_wqe->mpointer[0].byte_count = cpu_to_be32(len); + + DBG ( "prep_send_wqe_buf()\n" ); + DBG ( "snd_wqe:\n" ); + DBG_HD ( snd_wqe, sizeof ( *snd_wqe ) ); + DBG ( "packet:\n" ); + DBG_HD ( bus_to_virt(be32_to_cpu(snd_wqe->mpointer[0].local_addr_l)), + len ); } static void *alloc_ud_av(void) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 11a35c2e5..b9b12c36c 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -10,6 +10,15 @@ Skeleton NIC driver for Etherboot * your option) any later version. */ +#include +#include +#include +#include +#include + +struct mlx_nic { +}; + /* to get some global routines like printf */ #include "etherboot.h" /* to get the interface to the body of the program */ @@ -145,6 +154,131 @@ static void mt25218_transmit(struct nic *nic, const char *dest, /* Destination * } } +/** + * Open network device + * + * @v netdev Network device + * @ret rc Return status code + */ +static int mlx_open ( struct net_device *netdev ) { + return 0; +} + +/** + * Close network device + * + * @v netdev Network device + */ +static void mlx_close ( struct net_device *netdev ) { +} + +#warning "Broadcast address?" +static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; + + +/** + * Transmit packet + * + * @v netdev Network device + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int mlx_transmit ( struct net_device *netdev, + struct io_buffer *iobuf ) { + struct ibhdr *ibhdr = iobuf->data; + + DBG ( "Sending packet:\n" ); + // DBG_HD ( iobuf->data, iob_len ( iobuf ) ); + + DBG ( "Peer:\n" ); + DBG_HD ( &ibhdr->peer[0], IB_ALEN ); + DBG ( "Bcast:\n" ); + DBG_HD ( &ib_broadcast[0], IB_ALEN ); + + iob_pull ( iobuf, sizeof ( *ibhdr ) ); + + if ( memcmp ( ibhdr->peer, ib_broadcast, IB_ALEN ) == 0 ) { + printf ( "Sending broadcast packet\n" ); + return send_bcast_packet ( ibhdr->proto, iobuf->data, + iob_len ( iobuf ) ); + } else { + printf ( "Sending unicast packet\n" ); + return send_ucast_packet ( ibhdr->peer, ibhdr->proto, + iobuf->data, iob_len ( iobuf ) ); + } +} + +/** + * Poll for completed and received packets + * + * @v netdev Network device + */ +static void mlx_poll ( struct net_device *netdev ) { + struct ib_cqe_st ib_cqe; + uint8_t num_cqes; + unsigned int len; + struct io_buffer *iobuf; + void *buf; + int rc; + + if ( ( rc = poll_error_buf() ) != 0 ) { + DBG ( "poll_error_buf() failed: %s\n", strerror ( rc ) ); + return; + } + + if ( ( rc = drain_eq() ) != 0 ) { + DBG ( "drain_eq() failed: %s\n", strerror ( rc ) ); + return; + } + + if ( ( rc = ib_poll_cq ( ipoib_data.rcv_cqh, &ib_cqe, + &num_cqes ) ) != 0 ) { + DBG ( "ib_poll_cq() failed: %s\n", strerror ( rc ) ); + return; + } + + if ( ! num_cqes ) + return; + + if ( ib_cqe.is_error ) { + DBG ( "cqe error\n" ); + free_wqe ( ib_cqe.wqe ); + return; + } + + len = ib_cqe.count; + iobuf = alloc_iob ( len ); + if ( ! iobuf ) { + DBG ( "out of memory\n" ); + free_wqe ( ib_cqe.wqe ); + return; + } + memcpy ( iob_put ( iobuf, len ), buf, len ); + DBG ( "Received packet:\n" ); + DBG_HD ( iobuf->data, iob_len ( iobuf ) ); + + netdev_rx ( netdev, iobuf ); + + free_wqe ( ib_cqe.wqe ); +} + +/** + * Enable or disable interrupts + * + * @v netdev Network device + * @v enable Interrupts should be enabled + */ +static void mlx_irq ( struct net_device *netdev, int enable ) { +} + +static struct net_device_operations mlx_operations = { + .open = mlx_open, + .close = mlx_close, + .transmit = mlx_transmit, + .poll = mlx_poll, + .irq = mlx_irq, +}; + /************************************************************************** DISABLE - Turn off ethernet interface ***************************************************************************/ @@ -165,6 +299,21 @@ static void mt25218_disable(struct nic *nic) } } +/** + * Remove PCI device + * + * @v pci PCI device + */ +static void mlx_remove ( struct pci_device *pci ) { + struct net_device *netdev = pci_get_drvdata ( pci ); + struct mlx_nic *mlx = netdev->priv; + + unregister_netdev ( netdev ); + ipoib_close(0); + netdev_nullify ( netdev ); + netdev_put ( netdev ); +} + static struct nic_operations mt25218_operations = { .connect = dummy_connect, .poll = mt25218_poll, @@ -233,12 +382,59 @@ static int mt25218_probe(struct nic *nic, struct pci_device *pci) return 0; } -static struct pci_device_id mt25218_nics[] = { +/** + * Probe PCI device + * + * @v pci PCI device + * @v id PCI ID + * @ret rc Return status code + */ +static int mlx_probe ( struct pci_device *pci, + const struct pci_device_id *id __unused ) { + struct net_device *netdev; + struct mlx_nic *mlx; + int rc; + + /* Allocate net device */ + netdev = alloc_ibdev ( sizeof ( *mlx ) ); + if ( ! netdev ) + return -ENOMEM; + netdev_init ( netdev, &mlx_operations ); + mlx = netdev->priv; + pci_set_drvdata ( pci, netdev ); + netdev->dev = &pci->dev; + memset ( mlx, 0, sizeof ( *mlx ) ); + + /* Fix up PCI device */ + adjust_pci_device ( pci ); + + /* Initialise hardware */ + if ( ( rc = ipoib_init ( pci ) ) != 0 ) + goto err_ipoib_init; + memcpy ( netdev->ll_addr, ipoib_data.port_gid_raw, IB_ALEN ); + + /* Register network device */ + if ( ( rc = register_netdev ( netdev ) ) != 0 ) + goto err_register_netdev; + + return 0; + + err_register_netdev: + err_ipoib_init: + ipoib_close(0); + netdev_nullify ( netdev ); + netdev_put ( netdev ); + return rc; +} + +static struct pci_device_id mlx_nics[] = { PCI_ROM(0x15b3, 0x6282, "MT25218", "MT25218 HCA driver"), PCI_ROM(0x15b3, 0x6274, "MT25204", "MT25204 HCA driver"), }; -PCI_DRIVER ( mt25218_driver, mt25218_nics, PCI_NO_CLASS ); - -DRIVER ( "MT25218", nic_driver, pci_driver, mt25218_driver, - mt25218_probe, mt25218_disable ); +struct pci_driver mlx_driver __pci_driver = { + .ids = mlx_nics, + .id_count = ( sizeof ( mlx_nics ) / sizeof ( mlx_nics[0] ) ), + .probe = mlx_probe, + .remove = mlx_remove, +}; diff --git a/src/include/gpxe/errfile.h b/src/include/gpxe/errfile.h index 4f9e7bc65..3413f9cf4 100644 --- a/src/include/gpxe/errfile.h +++ b/src/include/gpxe/errfile.h @@ -123,6 +123,7 @@ #define ERRFILE_dhcp ( ERRFILE_NET | 0x00100000 ) #define ERRFILE_dns ( ERRFILE_NET | 0x00110000 ) #define ERRFILE_tftp ( ERRFILE_NET | 0x00120000 ) +#define ERRFILE_infiniband ( ERRFILE_NET | 0x00130000 ) #define ERRFILE_image ( ERRFILE_IMAGE | 0x00000000 ) #define ERRFILE_elf ( ERRFILE_IMAGE | 0x00010000 ) diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h new file mode 100644 index 000000000..126113a74 --- /dev/null +++ b/src/include/gpxe/infiniband.h @@ -0,0 +1,52 @@ +#ifndef _GPXE_INFINIBAND_H +#define _GPXE_INFINIBAND_H + +/** @file + * + * Infiniband protocol + * + */ + +#include +#include + +/** Infiniband hardware address length */ +#define IB_ALEN 20 +#define IB_HLEN 24 + +/** An Infiniband header + * + * This data structure doesn't represent the on-wire format, but does + * contain all the information required by the driver to construct the + * packet. + */ +struct ibhdr { + /** Peer address */ + uint8_t peer[IB_ALEN]; + /** Network-layer protocol */ + uint16_t proto; + /** Reserved, must be zero */ + uint16_t reserved; +} __attribute__ (( packed )); + +extern struct ll_protocol infiniband_protocol; + +extern const char * ib_ntoa ( const void *ll_addr ); + +/** + * Allocate Infiniband device + * + * @v priv_size Size of driver private data + * @ret netdev Network device, or NULL + */ +static inline struct net_device * alloc_ibdev ( size_t priv_size ) { + struct net_device *netdev; + + netdev = alloc_netdev ( priv_size ); + if ( netdev ) { + netdev->ll_protocol = &infiniband_protocol; + } + return netdev; +} + +#endif /* _GPXE_INFINIBAND_H */ diff --git a/src/include/gpxe/netdevice.h b/src/include/gpxe/netdevice.h index d82c6d8f4..2cbd0efb1 100644 --- a/src/include/gpxe/netdevice.h +++ b/src/include/gpxe/netdevice.h @@ -19,10 +19,10 @@ struct ll_protocol; struct device; /** Maximum length of a link-layer address */ -#define MAX_LL_ADDR_LEN 6 +#define MAX_LL_ADDR_LEN 20 /** Maximum length of a link-layer header */ -#define MAX_LL_HEADER_LEN 16 +#define MAX_LL_HEADER_LEN 32 /** Maximum length of a network-layer address */ #define MAX_NET_ADDR_LEN 4 diff --git a/src/net/infiniband.c b/src/net/infiniband.c new file mode 100644 index 000000000..bcfac292d --- /dev/null +++ b/src/net/infiniband.c @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2007 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** @file + * + * Infiniband protocol + * + */ + +/** Infiniband broadcast MAC address */ +static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; + +/** + * Transmit Infiniband packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v ll_dest Link-layer destination address + * + * Prepends the Infiniband link-layer header and transmits the packet. + */ +static int ib_tx ( struct io_buffer *iobuf, struct net_device *netdev, + struct net_protocol *net_protocol, const void *ll_dest ) { + struct ibhdr *ibhdr = iob_push ( iobuf, sizeof ( *ibhdr ) ); + + + /* Build Infiniband header */ + memcpy ( ibhdr->peer, ll_dest, IB_ALEN ); + ibhdr->proto = net_protocol->net_proto; + ibhdr->reserved = 0; + + /* Hand off to network device */ + return netdev_tx ( netdev, iobuf ); +} + +/** + * Process received Infiniband packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * + * Strips off the Infiniband link-layer header and passes up to the + * network-layer protocol. + */ +static int ib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { + struct ibhdr *ibhdr = iobuf->data; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *ibhdr ) ) { + DBG ( "Infiniband packet too short (%d bytes)\n", + iob_len ( iobuf ) ); + free_iob ( iobuf ); + return -EINVAL; + } + + /* Strip off Infiniband header */ + iob_pull ( iobuf, sizeof ( *ibhdr ) ); + + /* Hand off to network-layer protocol */ + return net_rx ( iobuf, netdev, ibhdr->proto, ibhdr->peer ); +} + +/** + * Transcribe Infiniband address + * + * @v ll_addr Link-layer address + * @ret string Link-layer address in human-readable format + */ +const char * ib_ntoa ( const void *ll_addr ) { + static char buf[61]; + const uint8_t *ib_addr = ll_addr; + unsigned int i; + char *p = buf; + + for ( i = 0 ; i < IB_ALEN ; i++ ) { + p += sprintf ( p, ":%02x", ib_addr[i] ); + } + return ( buf + 1 ); +} + +/** Infiniband protocol */ +struct ll_protocol infiniband_protocol __ll_protocol = { + .name = "Infiniband", + .ll_proto = htons ( ARPHRD_INFINIBAND ), + .ll_addr_len = IB_ALEN, + .ll_header_len = IB_HLEN, + .ll_broadcast = ib_broadcast, + .tx = ib_tx, + .rx = ib_rx, + .ntoa = ib_ntoa, +}; -- cgit v1.2.3-55-g7522 From 7e4e5af462a9f62f74e7a5c49802431248dda8b2 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 13 Sep 2007 01:53:04 +0100 Subject: Use RFC4390 whenever hardware address exceeds 16 bytes; this allows us to construct DHCP packets suitable for Infiniband. --- src/include/gpxe/dhcp.h | 7 +++++++ src/net/udp/dhcp.c | 38 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 2 deletions(-) (limited to 'src/include') diff --git a/src/include/gpxe/dhcp.h b/src/include/gpxe/dhcp.h index 43bccf185..18baa86ad 100644 --- a/src/include/gpxe/dhcp.h +++ b/src/include/gpxe/dhcp.h @@ -432,6 +432,13 @@ struct dhcphdr { /** Opcode for a reply from server to client */ #define BOOTP_REPLY 2 +/** BOOTP reply must be broadcast + * + * Clients that cannot accept unicast BOOTP replies must set this + * flag. + */ +#define BOOTP_FL_BROADCAST 0x8000 + /** DHCP magic cookie */ #define DHCP_MAGIC_COOKIE 0x63825363UL diff --git a/src/net/udp/dhcp.c b/src/net/udp/dhcp.c index 9e48fe11b..8e34ccb6c 100644 --- a/src/net/udp/dhcp.c +++ b/src/net/udp/dhcp.c @@ -298,6 +298,7 @@ static int create_dhcp_packet ( struct net_device *netdev, uint8_t msgtype, void *data, size_t max_len, struct dhcp_packet *dhcppkt ) { struct dhcphdr *dhcphdr = data; + unsigned int hlen; int rc; /* Sanity check */ @@ -309,9 +310,17 @@ static int create_dhcp_packet ( struct net_device *netdev, uint8_t msgtype, dhcphdr->xid = dhcp_xid ( netdev ); dhcphdr->magic = htonl ( DHCP_MAGIC_COOKIE ); dhcphdr->htype = ntohs ( netdev->ll_protocol->ll_proto ); - dhcphdr->hlen = netdev->ll_protocol->ll_addr_len; - memcpy ( dhcphdr->chaddr, netdev->ll_addr, dhcphdr->hlen ); dhcphdr->op = dhcp_op[msgtype]; + /* If hardware length exceeds the chaddr field length, don't + * use the chaddr field. This is as per RFC4390. + */ + hlen = netdev->ll_protocol->ll_addr_len; + if ( hlen > sizeof ( dhcphdr->chaddr ) ) { + hlen = 0; + dhcphdr->flags = htons ( BOOTP_FL_BROADCAST ); + } + dhcphdr->hlen = hlen; + memcpy ( dhcphdr->chaddr, netdev->ll_addr, hlen ); /* Initialise DHCP packet structure */ dhcppkt->dhcphdr = dhcphdr; @@ -494,6 +503,14 @@ struct dhcp_netdev_desc { uint16_t device; } __attribute__ (( packed )); +/** DHCP client identifier */ +struct dhcp_client_id { + /** Link-layer protocol */ + uint8_t ll_proto; + /** Link-layer address */ + uint8_t ll_addr[MAX_LL_ADDR_LEN]; +} __attribute__ (( packed )); + /** * Create DHCP request * @@ -511,7 +528,9 @@ int create_dhcp_request ( struct net_device *netdev, int msgtype, struct dhcp_packet *dhcppkt ) { struct device_description *desc = &netdev->dev->desc; struct dhcp_netdev_desc dhcp_desc; + struct dhcp_client_id client_id; size_t dhcp_features_len; + size_t ll_addr_len; int rc; /* Create DHCP packet */ @@ -570,6 +589,21 @@ int create_dhcp_request ( struct net_device *netdev, int msgtype, return rc; } + /* Add DHCP client identifier. Required for Infiniband, and + * doesn't hurt other link layers. + */ + client_id.ll_proto = netdev->ll_protocol->ll_proto; + ll_addr_len = netdev->ll_protocol->ll_addr_len; + assert ( ll_addr_len <= sizeof ( client_id.ll_addr ) ); + memcpy ( client_id.ll_addr, netdev->ll_addr, ll_addr_len ); + if ( ( rc = set_dhcp_packet_option ( dhcppkt, DHCP_CLIENT_ID, + &client_id, + ( ll_addr_len + 1 ) ) ) != 0 ) { + DBG ( "DHCP could not set client ID: %s\n", + strerror ( rc ) ); + return rc; + } + return 0; } -- cgit v1.2.3-55-g7522 From 30a19c3f1c51ff404d8de3196c4355fba3083c8e Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 13 Sep 2007 14:43:12 +0100 Subject: Can now both send and receive packets. LL header format not yet fixed; still using a quick hack-up just to be able to pass through data. --- src/drivers/net/mlx_ipoib/ib_mt25218.c | 2 ++ src/drivers/net/mlx_ipoib/mt25218.c | 17 +++++++++------- src/include/gpxe/infiniband.h | 37 ++++++++++++++++++++++++++++++++++ src/net/infiniband.c | 11 ++++++++++ 4 files changed, 60 insertions(+), 7 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index 631a95cba..dcd49e45d 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -1587,12 +1587,14 @@ static void prep_send_wqe_buf(void *qph, } snd_wqe->mpointer[0].byte_count = cpu_to_be32(len); +#if 0 DBG ( "prep_send_wqe_buf()\n" ); DBG ( "snd_wqe:\n" ); DBG_HD ( snd_wqe, sizeof ( *snd_wqe ) ); DBG ( "packet:\n" ); DBG_HD ( bus_to_virt(be32_to_cpu(snd_wqe->mpointer[0].local_addr_l)), len ); +#endif } static void *alloc_ud_av(void) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index de91dac65..a468f160e 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -248,13 +248,13 @@ static void mlx_poll ( struct net_device *netdev ) { } buf = get_rcv_wqe_buf(ib_cqe.wqe, 1); memcpy ( iob_put ( iobuf, len ), buf, len ); - DBG ( "Received packet header:\n" ); - struct recv_wqe_st *rcv_wqe = ib_cqe.wqe; - DBG_HD ( get_rcv_wqe_buf(ib_cqe.wqe, 0), - be32_to_cpu(rcv_wqe->mpointer[0].byte_count) ); + // DBG ( "Received packet header:\n" ); + // struct recv_wqe_st *rcv_wqe = ib_cqe.wqe; + // DBG_HD ( get_rcv_wqe_buf(ib_cqe.wqe, 0), + // be32_to_cpu(rcv_wqe->mpointer[0].byte_count) ); - DBG ( "Received packet:\n" ); - DBG_HD ( iobuf->data, iob_len ( iobuf ) ); + // DBG ( "Received packet:\n" ); + // DBG_HD ( iobuf->data, iob_len ( iobuf ) ); netdev_rx ( netdev, iobuf ); @@ -392,6 +392,7 @@ static int mlx_probe ( struct pci_device *pci, const struct pci_device_id *id __unused ) { struct net_device *netdev; struct mlx_nic *mlx; + struct ib_mac *mac; int rc; /* Allocate net device */ @@ -410,7 +411,9 @@ static int mlx_probe ( struct pci_device *pci, /* Initialise hardware */ if ( ( rc = ipoib_init ( pci ) ) != 0 ) goto err_ipoib_init; - memcpy ( netdev->ll_addr, ipoib_data.port_gid_raw, IB_ALEN ); + mac = ( ( struct ib_mac * ) netdev->ll_addr ); + mac->qpn = htonl ( ipoib_data.ipoib_qpn ); + memcpy ( &mac->gid, ipoib_data.port_gid_raw, sizeof ( mac->gid ) ); /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 126113a74..11cec1894 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -14,6 +14,43 @@ #define IB_ALEN 20 #define IB_HLEN 24 +/** An Infiniband Global Identifier */ +struct ib_gid { + uint8_t bytes[16]; +}; + +/** An Infiniband Global Route Header */ +struct ib_global_route_header { + /** IP version, traffic class, and flow label + * + * 4 bits : Version of the GRH + * 8 bits : Traffic class + * 20 bits : Flow label + */ + uint32_t ipver_tclass_flowlabel; + /** Payload length */ + uint16_t paylen; + /** Next header */ + uint8_t nxthdr; + /** Hop limit */ + uint8_t hoplmt; + /** Source GID */ + struct ib_gid sgid; + /** Destiniation GID */ + struct ib_gid dgid; +} __attribute__ (( packed )); + +/** An Infiniband MAC address */ +struct ib_mac { + /** Queue pair number + * + * MSB must be zero; QPNs are only 24-bit. + */ + uint32_t qpn; + /** Port GID */ + struct ib_gid gid; +} __attribute__ (( packed )); + /** An Infiniband header * * This data structure doesn't represent the on-wire format, but does diff --git a/src/net/infiniband.c b/src/net/infiniband.c index bcfac292d..c7fabd0ee 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -70,6 +70,17 @@ static int ib_tx ( struct io_buffer *iobuf, struct net_device *netdev, * network-layer protocol. */ static int ib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { + + struct { + uint16_t proto; + uint16_t reserved; + } * header = iobuf->data; + + iob_pull ( iobuf, sizeof ( *header ) ); + return net_rx ( iobuf, netdev, header->proto, NULL ); + + + struct ibhdr *ibhdr = iobuf->data; /* Sanity check */ -- cgit v1.2.3-55-g7522 From 08e8dfd801afd35f2f006520b1df78d05de1921a Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Fri, 14 Sep 2007 11:10:25 +0100 Subject: Now handling TX completions in our poll loop. --- src/drivers/net/mlx_ipoib/mt25218.c | 113 ++++++++++++++++++++---------------- src/include/gpxe/infiniband.h | 19 +++--- src/net/infiniband.c | 17 +----- 3 files changed, 74 insertions(+), 75 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 810a479a2..020f9294a 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -16,9 +16,6 @@ Skeleton NIC driver for Etherboot #include #include -struct mlx_nic { -}; - /* to get some global routines like printf */ #include "etherboot.h" /* to get the interface to the body of the program */ @@ -27,6 +24,16 @@ struct mlx_nic { #include "mt_version.c" #include "mt25218_imp.c" +struct mlx_nic { + /** Queue pair handle */ + udqp_t ipoib_qph; + /** Broadcast Address Vector */ + ud_av_t bcast_av; + /** Send completion queue */ + cq_t snd_cqh; + /** Receive completion queue */ + cq_t rcv_cqh; +}; int prompt_key(int secs, unsigned char *ch_p) { @@ -192,8 +199,28 @@ static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; */ static int mlx_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) { - struct ibhdr *ibhdr = iobuf->data; + struct mlx_nic *mlx = netdev->priv; + ud_send_wqe_t snd_wqe; + int rc; + + snd_wqe = alloc_send_wqe ( mlx->ipoib_qph ); + if ( ! snd_wqe ) { + DBGC ( mlx, "MLX %p out of TX WQEs\n", mlx ); + return -ENOBUFS; + } + + prep_send_wqe_buf ( mlx->ipoib_qph, mlx->bcast_av, snd_wqe, + iobuf->data, 0, iob_len ( iobuf ), 0 ); + if ( ( rc = post_send_req ( mlx->ipoib_qph, snd_wqe, 1 ) ) != 0 ) { + DBGC ( mlx, "MLX %p could not post TX WQE %p: %s\n", + mlx, snd_wqe, strerror ( rc ) ); + free_wqe ( snd_wqe ); + return rc; + } + + return 0; +#if 0 ( void ) netdev; iob_pull ( iobuf, sizeof ( *ibhdr ) ); @@ -208,46 +235,47 @@ static int mlx_transmit ( struct net_device *netdev, ntohs ( ibhdr->proto ), iobuf->data, iob_len ( iobuf ) ); } +#endif } /** * Handle TX completion * * @v netdev Network device - * @v cqe Completion queue entry + * @v ib_cqe Completion queue entry */ static void mlx_tx_complete ( struct net_device *netdev, - struct ib_cqe_st *cqe ) { + struct ib_cqe_st *ib_cqe ) { netdev_tx_complete_next_err ( netdev, - ( cqe->is_error ? -EIO : 0 ) ); + ( ib_cqe->is_error ? -EIO : 0 ) ); } /** * Handle RX completion * * @v netdev Network device - * @v cqe Completion queue entry + * @v ib_cqe Completion queue entry */ static void mlx_rx_complete ( struct net_device *netdev, - struct ib_cqe_st *cqe ) { + struct ib_cqe_st *ib_cqe ) { unsigned int len; struct io_buffer *iobuf; void *buf; /* Check for errors */ - if ( cqe->is_error ) { + if ( ib_cqe->is_error ) { netdev_rx_err ( netdev, NULL, -EIO ); return; } /* Allocate I/O buffer */ - len = cqe->count; + len = ( ib_cqe->count - GRH_SIZE ); iobuf = alloc_iob ( len ); if ( ! iobuf ) { netdev_rx_err ( netdev, NULL, -ENOMEM ); return; } - buf = get_rcv_wqe_buf ( cqe->wqe, 1 ); + buf = get_rcv_wqe_buf ( ib_cqe->wqe, 1 ); memcpy ( iob_put ( iobuf, len ), buf, len ); // DBG ( "Received packet header:\n" ); // struct recv_wqe_st *rcv_wqe = ib_cqe.wqe; @@ -263,52 +291,33 @@ static void mlx_rx_complete ( struct net_device *netdev, * * @v netdev Network device * @v cq Completion queue + * @v handler Completion handler */ -static void mlx_poll_cq ( struct net_device *netdev, - struct cq_st *cq ) { +static void mlx_poll_cq ( struct net_device *netdev, cq_t cq, + void ( * handler ) ( struct net_device *netdev, + struct ib_cqe_st *ib_cqe ) ) { struct mlx_nic *mlx = netdev->priv; - struct ib_cqe_st cqe; + struct ib_cqe_st ib_cqe; uint8_t num_cqes; while ( 1 ) { - unsigned long cons_idx; - union cqe_st *temp; - - cons_idx = ( cq->cons_counter & ( cq->num_cqes - 1 ) ); - temp = &cq->cq_buf[cons_idx]; - if ( EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, - owner ) == 0 ) { - DBG ( "software owned\n" ); - DBGC_HD ( mlx, temp, sizeof ( *temp ) ); - DBG ( "my_qpn=%lx, g=%ld, s=%ld, op=%02lx, cnt=%lx\n", - EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, my_qpn ), - EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, g ), - EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, s ), - EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, opcode ), - EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, byte_cnt ) ); - } - /* Poll for single completion queue entry */ - ib_poll_cq ( cq, &cqe, &num_cqes ); + ib_poll_cq ( cq, &ib_cqe, &num_cqes ); /* Return if no entries in the queue */ if ( ! num_cqes ) return; DBGC ( mlx, "MLX %p cpl in %p: err %x send %x " - "wqe %p count %lx\n", mlx, cq, cqe.is_error, - cqe.is_send, cqe.wqe, cqe.count ); + "wqe %p count %lx\n", mlx, cq, ib_cqe.is_error, + ib_cqe.is_send, ib_cqe.wqe, ib_cqe.count ); /* Handle TX/RX completion */ - if ( cqe.is_send ) { - mlx_tx_complete ( netdev, &cqe ); - } else { - mlx_rx_complete ( netdev, &cqe ); - } - + handler ( netdev, &ib_cqe ); + /* Free associated work queue entry */ - free_wqe ( cqe.wqe ); + free_wqe ( ib_cqe.wqe ); } } @@ -318,6 +327,7 @@ static void mlx_poll_cq ( struct net_device *netdev, * @v netdev Network device */ static void mlx_poll ( struct net_device *netdev ) { + struct mlx_nic *mlx = netdev->priv; int rc; if ( ( rc = poll_error_buf() ) != 0 ) { @@ -330,8 +340,8 @@ static void mlx_poll ( struct net_device *netdev ) { return; } - // mlx_poll_cq ( netdev, ipoib_data.snd_cqh ); - mlx_poll_cq ( netdev, ipoib_data.rcv_cqh ); + mlx_poll_cq ( netdev, mlx->snd_cqh, mlx_tx_complete ); + mlx_poll_cq ( netdev, mlx->rcv_cqh, mlx_rx_complete ); } /** @@ -386,7 +396,7 @@ static void mlx_remove ( struct pci_device *pci ) { struct net_device *netdev = pci_get_drvdata ( pci ); unregister_netdev ( netdev ); - ipoib_close(0); + ib_driver_close ( 0 ); netdev_nullify ( netdev ); netdev_put ( netdev ); } @@ -473,6 +483,7 @@ static int mlx_probe ( struct pci_device *pci, struct net_device *netdev; struct mlx_nic *mlx; struct ib_mac *mac; + udqp_t qph; int rc; /* Allocate net device */ @@ -489,11 +500,15 @@ static int mlx_probe ( struct pci_device *pci, adjust_pci_device ( pci ); /* Initialise hardware */ - if ( ( rc = ipoib_init ( pci ) ) != 0 ) + if ( ( rc = ib_driver_init ( pci, &qph ) ) != 0 ) goto err_ipoib_init; + mlx->ipoib_qph = qph; + mlx->bcast_av = ib_data.bcast_av; + mlx->snd_cqh = ib_data.ipoib_snd_cq; + mlx->rcv_cqh = ib_data.ipoib_rcv_cq; mac = ( ( struct ib_mac * ) netdev->ll_addr ); - mac->qpn = htonl ( ipoib_data.ipoib_qpn ); - memcpy ( &mac->gid, ipoib_data.port_gid_raw, sizeof ( mac->gid ) ); + mac->qpn = htonl ( ib_get_qpn ( mlx->ipoib_qph ) ); + memcpy ( &mac->gid, ib_data.port_gid.raw, sizeof ( mac->gid ) ); /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) @@ -503,7 +518,7 @@ static int mlx_probe ( struct pci_device *pci, err_register_netdev: err_ipoib_init: - ipoib_close(0); + ib_driver_close ( 0 ); netdev_nullify ( netdev ); netdev_put ( netdev ); return rc; diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 11cec1894..9f126b49d 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -10,10 +10,6 @@ #include #include -/** Infiniband hardware address length */ -#define IB_ALEN 20 -#define IB_HLEN 24 - /** An Infiniband Global Identifier */ struct ib_gid { uint8_t bytes[16]; @@ -40,6 +36,9 @@ struct ib_global_route_header { struct ib_gid dgid; } __attribute__ (( packed )); +/** Infiniband MAC address length */ +#define IB_ALEN 20 + /** An Infiniband MAC address */ struct ib_mac { /** Queue pair number @@ -51,15 +50,11 @@ struct ib_mac { struct ib_gid gid; } __attribute__ (( packed )); -/** An Infiniband header - * - * This data structure doesn't represent the on-wire format, but does - * contain all the information required by the driver to construct the - * packet. - */ +/** Infiniband link-layer header length */ +#define IB_HLEN 4 + +/** An Infiniband link-layer header */ struct ibhdr { - /** Peer address */ - uint8_t peer[IB_ALEN]; /** Network-layer protocol */ uint16_t proto; /** Reserved, must be zero */ diff --git a/src/net/infiniband.c b/src/net/infiniband.c index c7fabd0ee..52811b921 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -50,12 +50,12 @@ static int ib_tx ( struct io_buffer *iobuf, struct net_device *netdev, struct net_protocol *net_protocol, const void *ll_dest ) { struct ibhdr *ibhdr = iob_push ( iobuf, sizeof ( *ibhdr ) ); - /* Build Infiniband header */ - memcpy ( ibhdr->peer, ll_dest, IB_ALEN ); ibhdr->proto = net_protocol->net_proto; ibhdr->reserved = 0; + ( void ) ll_dest; + /* Hand off to network device */ return netdev_tx ( netdev, iobuf ); } @@ -70,17 +70,6 @@ static int ib_tx ( struct io_buffer *iobuf, struct net_device *netdev, * network-layer protocol. */ static int ib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { - - struct { - uint16_t proto; - uint16_t reserved; - } * header = iobuf->data; - - iob_pull ( iobuf, sizeof ( *header ) ); - return net_rx ( iobuf, netdev, header->proto, NULL ); - - - struct ibhdr *ibhdr = iobuf->data; /* Sanity check */ @@ -95,7 +84,7 @@ static int ib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { iob_pull ( iobuf, sizeof ( *ibhdr ) ); /* Hand off to network-layer protocol */ - return net_rx ( iobuf, netdev, ibhdr->proto, ibhdr->peer ); + return net_rx ( iobuf, netdev, ibhdr->proto, NULL ); } /** -- cgit v1.2.3-55-g7522 From 9d08b7c692aac5b1790555f8fc28ddb52ef28bb5 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Fri, 14 Sep 2007 20:29:44 +0100 Subject: Starting to introduce an Infiniband device abstraction --- src/drivers/net/mlx_ipoib/mt25218.c | 138 ++++++++++++++++++++++++++++++++++++ src/include/gpxe/infiniband.h | 63 ++++++++++++++++ 2 files changed, 201 insertions(+) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 3cbca49a5..c6015fb24 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -72,6 +72,7 @@ static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; static int mlx_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) { struct mlx_nic *mlx = netdev->priv; + ud_av_t av = iobuf->data; ud_send_wqe_t snd_wqe; int rc; @@ -222,6 +223,143 @@ static struct net_device_operations mlx_operations = { .irq = mlx_irq, }; + + +int ib_alloc_wqe ( struct ib_work_queue *wq, struct io_buffer *iobuf ) { + unsigned int wqe_idx; + unsigned int new_write_ptr; + + /* Allocate queue entry */ + wqe_idx = new_write_ptr = wq->write_ptr; + if ( wq->iobuf[wqe_idx] ) + return -ENOBUFS; + wq->iobuf[wqe_idx] = iobuf; + + /* Update write pointer */ + new_write_ptr++; + new_write_ptr &= ( wq->num_wqes - 1 ); + wq->write_ptr = new_write_ptr; + + return wqe_idx; +} + +static inline void ib_free_wqe ( struct ib_work_queue *wq, int wqe_idx ) { + assert ( wq->iobuf[wqe_idx] != NULL ); + wq->iobuf[wqe_idx] = NULL; +} + +static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_address_vector *av, + struct ib_queue_pair *qp ) { + struct mlx *mlx = ibdev->priv; + struct ib_work_queue *wq = &qp->send; + struct mlx_work_queue *mlx_wq = wq->priv; + unsigned int wqe_idx_mask = ( wq->num_wqes - 1 ); + unsigned int prev_wqe_idx; + struct ud_send_wqe_st *prev_wqe; + unsigned int wqe_idx; + struct ud_send_wqe_st *wqe; + struct ib_gid *gid; + size_t nds; + struct send_doorbell_st doorbell; + + /* Allocate work queue entry */ + prev_wqe_idx = wq->posted; + wqe_idx = ( prev_wqe_index + 1 ); + if ( wq->iobuf[wqe_idx & wqe_idx_mask] ) { + DBGC ( mlx, "MLX %p send queue full", mlx ); + return -ENOBUFS; + } + prev_wqe = &mlx_wq->wqe[prev_wqe_idx & wqe_idx_mask]; + wqe = &mlx_wq->wqe[wqe_idx & wqe_idx_mask]; + + /* Construct work queue entry */ + memset ( &wqe->next.control, 0, + sizeof ( wqe->next.control ) ); + MLX_POPULATE_1 ( &wqe->next.control, + arbelprm_wqe_segment_ctrl_send_st, 0, + always1, 1 ); + memset ( &wqe->udseg, 0, sizeof ( wqe->udseg ) ); + MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 0, + pd, GLOBAL_PD, + port_number, mlx->port ); + MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 1, + rlid, av->remote_lid, + g, av->gid_present ); + MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 2, + max_stat_rate, ( ( av->rate >= 3 ) ? 0 : 1 ), + msg, 3 ); + MLX_POPULATE_1 ( &wqe->udseg, arbelprm_ud_address_vector_st, 3, + sl, av->sl ); + gid = ( av->gid_present ? av->gid : &ib_no_gid ); + memcpy ( ( ( ( void * ) &wqe->udseg ) + 16 ), + gid, sizeof ( *gid ) ); + MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 8, + destination_qp, av->dest_qp ); + MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 9, + q_key, av->qkey ); + wqe->mpointer[0].local_addr_l = + cpu_to_be32 ( virt_to_bus ( iobuf->data ) ); + wqe->mpointer[0].byte_count = cpu_to_be32 ( iob_len ( iobuf ) ); + + /* Update previous work queue entry's "next" field */ + nds = ( offsetof ( typeof ( *wqe ), mpointer ) + + sizeof ( wqe->mpointer[0] ) ); + MLX_MODIFY_1 ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 0, + nopcode, XDEV_NOPCODE_SEND ); + MLX_POPULATE_3 ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 1, + nds, nds, + f, 1, + always1, 1 ); + + /* Ring doorbell */ + + doorbell index is a property of the queue pair + + + MLX_POPULATE_1 ( mlx_wq->send_uar_context, arbelprm_qp_db_record_st, 0, + counter, ( wqe_idx & 0xffff ) ); + memset ( &doorbell, 0, sizeof ( doorbell ) ); + MLX_POPULATE_4 ( &doorbell, arbelprm_send_doorbell_st, 0, + nopcode, XDEV_NOPCODE_SEND, + f, 1, + wqe_counter, ( prev_wqe_idx & 0xffff ), + wqe_cnt, 1 ); + MLX_POPULATE_2 ( &doorbell, arbelprm_send_doorbell_st, 1, + nds, nds, + qpn, qp->qpn ); + barrier(); + + wq->posted = wqe_idx; + + + struct mlx_nic *mlx = netdev->priv; + ud_av_t av = iobuf->data; + ud_send_wqe_t snd_wqe; + int rc; + + snd_wqe = alloc_send_wqe ( mlx->ipoib_qph ); + if ( ! snd_wqe ) { + DBGC ( mlx, "MLX %p out of TX WQEs\n", mlx ); + return -ENOBUFS; + } + + prep_send_wqe_buf ( mlx->ipoib_qph, mlx->bcast_av, snd_wqe, + iobuf->data, 0, iob_len ( iobuf ), 0 ); + if ( ( rc = post_send_req ( mlx->ipoib_qph, snd_wqe, 1 ) ) != 0 ) { + DBGC ( mlx, "MLX %p could not post TX WQE %p: %s\n", + mlx, snd_wqe, strerror ( rc ) ); + free_wqe ( snd_wqe ); + return rc; + } + + +} + +static struct ib_device_operations mlx_ib_operations = { + .post_send = mlx_post_send, +}; + /** * Remove PCI device * diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 9f126b49d..22a8a9824 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -61,6 +61,69 @@ struct ibhdr { uint16_t reserved; } __attribute__ (( packed )); +/** An Infiniband Work Queue */ +struct ib_work_queue { + /** Number of work queue entries */ + unsigned int num_wqes; + /** Posted index + * + * This is the index of the most recently posted entry. + */ + unsigned int posted; + /** Driver-private data + * + * Typically used to hold the address of the work queue. + */ + void *priv; + /** I/O buffers assigned to work queue */ + struct io_buffer *iobuf[0]; +}; + +/** An Infiniband Queue Pair */ +struct ib_queue_pair { + /** Queue Pair Number */ + uint32_t qpn; + /** Send queue */ + struct ib_work_queue send; + /** Receive queue */ + struct ib_work_queue recv; +}; + +/** An Infiniband Address Vector */ +struct ib_address_vector { + +}; + +/** + * Infiniband device operations + * + * These represent a subset of the Infiniband Verbs. + */ +struct ib_device_operations { + /** Post Send work queue entry + * + * @v ibdev Infiniband device + * @v iobuf I/O buffer + * @v av Address vector + * @v qp Queue pair + * @ret rc Return status code + * + * If this method returns success, the I/O buffer remains + * owned by the queue pair. If this method returns failure, + * the I/O buffer is immediately released; the failure is + * interpreted as "failure to enqueue buffer". + */ + int ( * post_send ) ( struct ib_device *ibdev, + struct io_buffer *iobuf, + struct ib_address_vector *av, + struct ib_queue_pair *qp ); +}; + + + + + + extern struct ll_protocol infiniband_protocol; extern const char * ib_ntoa ( const void *ll_addr ); -- cgit v1.2.3-55-g7522 From 38a73b55c445ffa596d4b4ecc2aef6476d00a3c1 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 00:19:38 +0100 Subject: Now at least compiles --- src/drivers/net/mlx_ipoib/bit_ops.h | 98 +++++++++++++++++++++++++++++++++++ src/drivers/net/mlx_ipoib/mt25218.c | 100 +++++++++++++++--------------------- src/drivers/net/mlx_ipoib/mt25218.h | 18 +++++++ src/include/gpxe/infiniband.h | 33 +++++++++--- 4 files changed, 185 insertions(+), 64 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/bit_ops.h b/src/drivers/net/mlx_ipoib/bit_ops.h index b67f92ce9..74823a608 100644 --- a/src/drivers/net/mlx_ipoib/bit_ops.h +++ b/src/drivers/net/mlx_ipoib/bit_ops.h @@ -133,4 +133,102 @@ struct addr_64_st { field; \ }) + + +/* Remaining code Copyright Fen Systems Ltd. 2007 */ + +/** Bit offset of a field within a pseudo_bit_t structure */ +#define MLX_BIT_OFFSET( _structure, _field ) \ + offsetof ( struct _structure, _field ) + +/** Bit width of a field within a pseudo_bit_t structure */ +#define MLX_BIT_WIDTH( _structure, _field ) \ + sizeof ( ( ( struct _structure * ) NULL )->_field ) + +/* + * Assemble native-endian dword from named fields and values + * + */ + +#define MLX_ASSEMBLE_1( _structure, _index, _field, _value ) \ + ( (_value) << \ + ( MLX_BIT_OFFSET ( _structure, _field ) - ( 32 * (_index) ) ) ) + +#define MLX_ASSEMBLE_2( _structure, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure, _index, _field, _value ) | \ + MLX_ASSEMBLE_1 ( _structure, _index, __VA_ARGS__ ) ) + +#define MLX_ASSEMBLE_3( _structure, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure, _index, _field, _value ) | \ + MLX_ASSEMBLE_2 ( _structure, _index, __VA_ARGS__ ) ) + +#define MLX_ASSEMBLE_4( _structure, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure, _index, _field, _value ) | \ + MLX_ASSEMBLE_3 ( _structure, _index, __VA_ARGS__ ) ) + +/* + * Build native-endian (positive) dword bitmasks from named fields + * + */ + +#define MLX_MASK_1( _structure, _index, _field ) \ + MLX_ASSEMBLE_1 ( _structure, _index, _field, \ + ( ( 1 << MLX_BIT_WIDTH ( _structure, \ + _field ) ) - 1 ) ) + +#define MLX_MASK_2( _structure, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure, _index, _field ) | \ + MLX_MASK_1 ( _structure, _index, __VA_ARGS__ ) ) + +#define MLX_MASK_3( _structure, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure, _index, _field ) | \ + MLX_MASK_2 ( _structure, _index, __VA_ARGS__ ) ) + +#define MLX_MASK_4( _structure, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure, _index, _field ) | \ + MLX_MASK_3 ( _structure, _index, __VA_ARGS__ ) ) + +/* + * Populate big-endian dwords from named fields and values + * + */ + +#define MLX_POPULATE( _base, _index, _assembled ) \ + do { \ + uint32_t *__ptr = ( ( (uint32_t *) (_base) ) + (_index) ); \ + uint32_t __assembled = (_assembled); \ + *__ptr = cpu_to_be32 ( __assembled ); \ + } while ( 0 ) + +#define MLX_POPULATE_1( _base, _structure, _index, ... ) \ + MLX_POPULATE ( _base, _index, \ + MLX_ASSEMBLE_1 ( _structure, _index, __VA_ARGS__ ) ) + +#define MLX_POPULATE_2( _base, _structure, _index, ... ) \ + MLX_POPULATE ( _base, _index, \ + MLX_ASSEMBLE_2 ( _structure, _index, __VA_ARGS__ ) ) + +#define MLX_POPULATE_3( _base, _structure, _index, ... ) \ + MLX_POPULATE ( _base, _index, \ + MLX_ASSEMBLE_3 ( _structure, _index, __VA_ARGS__ ) ) + +#define MLX_POPULATE_4( _base, _structure, _index, ... ) \ + MLX_POPULATE ( _base, _index, \ + MLX_ASSEMBLE_4 ( _structure, _index, __VA_ARGS__ ) ) + +/* + * Modify big-endian dword using named field and value + * + */ + +#define MLX_MODIFY( _base, _structure, _index, _field, _value ) \ + do { \ + uint32_t *__ptr = ( ( (uint32_t *) (_base) ) + (_index) ); \ + uint32_t __value = be32_to_cpu ( *__ptr ); \ + __value &= ~( MLX_MASK_1 ( _structure, _index, _field ) ); \ + __value |= MLX_ASSEMBLE_1 ( _structure, _index, \ + _field, _value ); \ + *__ptr = cpu_to_be32 ( __value ); \ + } while ( 0 ) + #endif /* __bit_ops_h__ */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index c6015fb24..e8290bb63 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -224,28 +224,32 @@ static struct net_device_operations mlx_operations = { }; +struct mlx_send_work_queue { + /** Doorbell number */ + unsigned int doorbell_idx; + /** Work queue entries */ + struct ud_send_wqe_st *wqe; +}; -int ib_alloc_wqe ( struct ib_work_queue *wq, struct io_buffer *iobuf ) { - unsigned int wqe_idx; - unsigned int new_write_ptr; - - /* Allocate queue entry */ - wqe_idx = new_write_ptr = wq->write_ptr; - if ( wq->iobuf[wqe_idx] ) - return -ENOBUFS; - wq->iobuf[wqe_idx] = iobuf; +struct mlx { + /** User Access Region */ + unsigned long uar; + /** Doorbell records */ + union db_record_st *db_rec; +}; - /* Update write pointer */ - new_write_ptr++; - new_write_ptr &= ( wq->num_wqes - 1 ); - wq->write_ptr = new_write_ptr; +static struct ib_gid mlx_no_gid = { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } +}; - return wqe_idx; -} +static void mlx_ring_doorbell ( struct mlx *mlx, void *db_reg, + unsigned int offset ) { + uint32_t *db_reg_dword = db_reg; -static inline void ib_free_wqe ( struct ib_work_queue *wq, int wqe_idx ) { - assert ( wq->iobuf[wqe_idx] != NULL ); - wq->iobuf[wqe_idx] = NULL; + barrier(); + writel ( db_reg_dword[0], ( mlx->uar + offset + 0 ) ); + barrier(); + writel ( db_reg_dword[1], ( mlx->uar + offset + 4 ) ); } static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, @@ -253,7 +257,7 @@ static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, struct ib_queue_pair *qp ) { struct mlx *mlx = ibdev->priv; struct ib_work_queue *wq = &qp->send; - struct mlx_work_queue *mlx_wq = wq->priv; + struct mlx_send_work_queue *mlx_wq = wq->priv; unsigned int wqe_idx_mask = ( wq->num_wqes - 1 ); unsigned int prev_wqe_idx; struct ud_send_wqe_st *prev_wqe; @@ -261,11 +265,12 @@ static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, struct ud_send_wqe_st *wqe; struct ib_gid *gid; size_t nds; - struct send_doorbell_st doorbell; + union db_record_st *db_rec; + struct send_doorbell_st db_reg; /* Allocate work queue entry */ prev_wqe_idx = wq->posted; - wqe_idx = ( prev_wqe_index + 1 ); + wqe_idx = ( prev_wqe_idx + 1 ); if ( wq->iobuf[wqe_idx & wqe_idx_mask] ) { DBGC ( mlx, "MLX %p send queue full", mlx ); return -ENOBUFS; @@ -282,16 +287,16 @@ static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, memset ( &wqe->udseg, 0, sizeof ( wqe->udseg ) ); MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 0, pd, GLOBAL_PD, - port_number, mlx->port ); + port_number, PXE_IB_PORT ); MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 1, - rlid, av->remote_lid, + rlid, av->dlid, g, av->gid_present ); MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 2, max_stat_rate, ( ( av->rate >= 3 ) ? 0 : 1 ), msg, 3 ); MLX_POPULATE_1 ( &wqe->udseg, arbelprm_ud_address_vector_st, 3, sl, av->sl ); - gid = ( av->gid_present ? av->gid : &ib_no_gid ); + gid = ( av->gid_present ? &av->gid : &mlx_no_gid ); memcpy ( ( ( ( void * ) &wqe->udseg ) + 16 ), gid, sizeof ( *gid ) ); MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 8, @@ -305,55 +310,34 @@ static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, /* Update previous work queue entry's "next" field */ nds = ( offsetof ( typeof ( *wqe ), mpointer ) + sizeof ( wqe->mpointer[0] ) ); - MLX_MODIFY_1 ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 0, - nopcode, XDEV_NOPCODE_SEND ); + MLX_MODIFY ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 0, + nopcode, XDEV_NOPCODE_SEND ); MLX_POPULATE_3 ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 1, nds, nds, f, 1, always1, 1 ); - /* Ring doorbell */ - - doorbell index is a property of the queue pair - - - MLX_POPULATE_1 ( mlx_wq->send_uar_context, arbelprm_qp_db_record_st, 0, + /* Update doorbell record */ + db_rec = &mlx->db_rec[mlx_wq->doorbell_idx]; + MLX_POPULATE_1 ( db_rec, arbelprm_qp_db_record_st, 0, counter, ( wqe_idx & 0xffff ) ); - memset ( &doorbell, 0, sizeof ( doorbell ) ); - MLX_POPULATE_4 ( &doorbell, arbelprm_send_doorbell_st, 0, + barrier(); + + /* Ring doorbell register */ + MLX_POPULATE_4 ( &db_reg, arbelprm_send_doorbell_st, 0, nopcode, XDEV_NOPCODE_SEND, f, 1, wqe_counter, ( prev_wqe_idx & 0xffff ), wqe_cnt, 1 ); - MLX_POPULATE_2 ( &doorbell, arbelprm_send_doorbell_st, 1, + MLX_POPULATE_2 ( &db_reg, arbelprm_send_doorbell_st, 1, nds, nds, qpn, qp->qpn ); - barrier(); + mlx_ring_doorbell ( mlx, &db_reg, POST_SND_OFFSET ); + /* Update work queue's posted index */ wq->posted = wqe_idx; - - struct mlx_nic *mlx = netdev->priv; - ud_av_t av = iobuf->data; - ud_send_wqe_t snd_wqe; - int rc; - - snd_wqe = alloc_send_wqe ( mlx->ipoib_qph ); - if ( ! snd_wqe ) { - DBGC ( mlx, "MLX %p out of TX WQEs\n", mlx ); - return -ENOBUFS; - } - - prep_send_wqe_buf ( mlx->ipoib_qph, mlx->bcast_av, snd_wqe, - iobuf->data, 0, iob_len ( iobuf ), 0 ); - if ( ( rc = post_send_req ( mlx->ipoib_qph, snd_wqe, 1 ) ) != 0 ) { - DBGC ( mlx, "MLX %p could not post TX WQE %p: %s\n", - mlx, snd_wqe, strerror ( rc ) ); - free_wqe ( snd_wqe ); - return rc; - } - - + return 0; } static struct ib_device_operations mlx_ib_operations = { diff --git a/src/drivers/net/mlx_ipoib/mt25218.h b/src/drivers/net/mlx_ipoib/mt25218.h index 1e7c8d8bf..590d72f65 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.h +++ b/src/drivers/net/mlx_ipoib/mt25218.h @@ -342,6 +342,24 @@ struct cq_dbell_st { __u8 raw[MT_STRUCT_SIZE(arbelprm_cq_cmd_doorbell_st)]; } __attribute__ ((packed)); +struct qp_db_record_st { + __u8 raw[MT_STRUCT_SIZE(arbelprm_qp_db_record_st)]; +} __attribute__ ((packed)); + +struct cq_arm_db_record_st { + __u8 raw[MT_STRUCT_SIZE(arbelprm_cq_arm_db_record_st)]; +} __attribute__ ((packed)); + +struct cq_ci_db_record_st { + __u8 raw[MT_STRUCT_SIZE(arbelprm_cq_ci_db_record_st)]; +} __attribute__ ((packed)); + +union db_record_st { + struct qp_db_record_st qp; + struct cq_arm_db_record_st cq_arm; + struct cq_ci_db_record_st cq_ci; +} __attribute__ ((packed)); + struct mad_ifc_inprm_st { union mad_u mad; } __attribute__ ((packed)); diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 22a8a9824..ccb6e49e4 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -61,6 +61,9 @@ struct ibhdr { uint16_t reserved; } __attribute__ (( packed )); + + + /** An Infiniband Work Queue */ struct ib_work_queue { /** Number of work queue entries */ @@ -70,10 +73,7 @@ struct ib_work_queue { * This is the index of the most recently posted entry. */ unsigned int posted; - /** Driver-private data - * - * Typically used to hold the address of the work queue. - */ + /** Driver private data */ void *priv; /** I/O buffers assigned to work queue */ struct io_buffer *iobuf[0]; @@ -87,13 +87,30 @@ struct ib_queue_pair { struct ib_work_queue send; /** Receive queue */ struct ib_work_queue recv; + /** Driver private data */ + void *priv; }; /** An Infiniband Address Vector */ struct ib_address_vector { - + /** Destination Queue Pair */ + unsigned int dest_qp; + /** Queue key */ + unsigned int qkey; + /** Destination Local ID */ + unsigned int dlid; + /** Rate */ + unsigned int rate; + /** Service level */ + unsigned int sl; + /** GID is present */ + unsigned int gid_present; + /** GID */ + struct ib_gid gid; }; +struct ib_device; + /** * Infiniband device operations * @@ -119,7 +136,11 @@ struct ib_device_operations { struct ib_queue_pair *qp ); }; - +/** An Infiniband device */ +struct ib_device { + /** Driver private data */ + void *priv; +}; -- cgit v1.2.3-55-g7522 From 8b27da9de16675f59be082168de9468346ec7183 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 01:35:07 +0100 Subject: Gets a response out of the hardware. (An error completion, to be precise.) --- src/drivers/net/mlx_ipoib/bit_ops.h | 41 ++++++++++++++++++++--- src/drivers/net/mlx_ipoib/mt25218.c | 67 +++++++++++++++++++++++++++++++++---- src/include/gpxe/infiniband.h | 4 +-- 3 files changed, 98 insertions(+), 14 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/bit_ops.h b/src/drivers/net/mlx_ipoib/bit_ops.h index 74823a608..969de642c 100644 --- a/src/drivers/net/mlx_ipoib/bit_ops.h +++ b/src/drivers/net/mlx_ipoib/bit_ops.h @@ -141,18 +141,33 @@ struct addr_64_st { #define MLX_BIT_OFFSET( _structure, _field ) \ offsetof ( struct _structure, _field ) +/** Dword offset of a field within a pseudo_bit_t structure */ +#define MLX_DWORD_OFFSET( _structure, _field ) \ + ( MLX_BIT_OFFSET ( _structure, _field ) / 32 ) + +/** Dword bit offset of a field within a pseudo_bit_t structure + * + * Yes, using mod-32 would work, but would lose the check for the + * error of specifying a mismatched field name and dword index. + */ +#define MLX_DWORD_BIT_OFFSET( _structure, _index, _field ) \ + ( MLX_BIT_OFFSET ( _structure, _field ) - ( 32 * (_index) ) ) + /** Bit width of a field within a pseudo_bit_t structure */ #define MLX_BIT_WIDTH( _structure, _field ) \ sizeof ( ( ( struct _structure * ) NULL )->_field ) +/** Bit mask for a field within a pseudo_bit_t structure */ +#define MLX_BIT_MASK( _structure, _field ) \ + ( ( 1 << MLX_BIT_WIDTH ( _structure, _field ) ) - 1 ) + /* * Assemble native-endian dword from named fields and values * */ #define MLX_ASSEMBLE_1( _structure, _index, _field, _value ) \ - ( (_value) << \ - ( MLX_BIT_OFFSET ( _structure, _field ) - ( 32 * (_index) ) ) ) + ( (_value) << MLX_DWORD_BIT_OFFSET ( _structure, _index, _field ) ) #define MLX_ASSEMBLE_2( _structure, _index, _field, _value, ... ) \ ( MLX_ASSEMBLE_1 ( _structure, _index, _field, _value ) | \ @@ -172,9 +187,8 @@ struct addr_64_st { */ #define MLX_MASK_1( _structure, _index, _field ) \ - MLX_ASSEMBLE_1 ( _structure, _index, _field, \ - ( ( 1 << MLX_BIT_WIDTH ( _structure, \ - _field ) ) - 1 ) ) + ( MLX_BIT_MASK ( _structure, _field ) << \ + MLX_DWORD_BIT_OFFSET ( _structure, _index, _field ) ) #define MLX_MASK_2( _structure, _index, _field, ... ) \ ( MLX_MASK_1 ( _structure, _index, _field ) | \ @@ -231,4 +245,21 @@ struct addr_64_st { *__ptr = cpu_to_be32 ( __value ); \ } while ( 0 ) +/* + * Extract value of named field + * + */ + +#define MLX_EXTRACT( _base, _structure, _field ) \ + ( { \ + unsigned int __index = \ + MLX_DWORD_OFFSET ( _structure, _field ); \ + uint32_t *__ptr = ( ( (uint32_t *) (_base) ) + __index ); \ + uint32_t __value = be32_to_cpu ( *__ptr ); \ + __value >>= MLX_DWORD_BIT_OFFSET ( _structure, __index, \ + _field ); \ + __value &= MLX_BIT_MASK ( _structure, _field ); \ + __value; \ + } ) + #endif /* __bit_ops_h__ */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index e8fcbb402..601a1f407 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -27,12 +27,13 @@ struct arbel_send_work_queue { /** Doorbell number */ unsigned int doorbell_idx; /** Work queue entries */ - struct ud_send_wqe_st *wqe; + // struct ud_send_wqe_st *wqe; + union ud_send_wqe_u *wqe_u; }; struct arbel { /** User Access Region */ - unsigned long uar; + void *uar; /** Doorbell records */ union db_record_st *db_rec; }; @@ -88,7 +89,6 @@ static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; static int mlx_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) { struct mlx_nic *mlx = netdev->priv; - ud_av_t av = iobuf->data; ud_send_wqe_t snd_wqe; int rc; @@ -110,6 +110,58 @@ static int mlx_transmit ( struct net_device *netdev, return 0; } +static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_address_vector *av, + struct ib_queue_pair *qp ); + +static struct io_buffer *tx_ring[NUM_IPOIB_SND_WQES]; +static int tx_posted = 0; + +static int mlx_transmit_direct ( struct net_device *netdev, + struct io_buffer *iobuf ) { + struct mlx_nic *mlx = netdev->priv; + int rc; + + struct arbel arbel = { + .uar = memfree_pci_dev.uar, + .db_rec = dev_ib_data.uar_context_base, + }; + struct arbel_send_work_queue arbel_send_queue = { + .doorbell_idx = IPOIB_SND_QP_DB_IDX, + .wqe_u = ( (struct udqp_st *) ipoib_data.ipoib_qph )->snd_wq, + }; + struct ib_device ibdev = { + .priv = &arbel, + }; + struct ib_queue_pair qp = { + .qpn = ib_get_qpn ( mlx->ipoib_qph ), + .send = { + .num_wqes = NUM_IPOIB_SND_WQES, + .posted = tx_posted, + .iobufs = tx_ring, + .priv = &arbel_send_queue, + }, + }; + struct ud_av_st *bcast_av = mlx->bcast_av; + struct address_vector_st *bav = &bcast_av->av; + struct ib_address_vector av = { + .dest_qp = bcast_av->dest_qp, + .qkey = bcast_av->qkey, + .dlid = MLX_EXTRACT ( bav, arbelprm_ud_address_vector_st, rlid ), + .rate = ( MLX_EXTRACT ( bav, arbelprm_ud_address_vector_st, max_stat_rate ) ? 1 : 4 ), + .sl = MLX_EXTRACT ( bav, arbelprm_ud_address_vector_st, sl ), + .gid_present = 1, + }; + memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); + + rc = arbel_post_send ( &ibdev, iobuf, &av, &qp ); + + tx_posted = qp.send.posted; + + return rc; +} + + /** * Handle TX completion * @@ -234,7 +286,7 @@ static void mlx_irq ( struct net_device *netdev, int enable ) { static struct net_device_operations mlx_operations = { .open = mlx_open, .close = mlx_close, - .transmit = mlx_transmit, + .transmit = mlx_transmit_direct, .poll = mlx_poll, .irq = mlx_irq, }; @@ -274,12 +326,13 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, /* Allocate work queue entry */ prev_wqe_idx = wq->posted; wqe_idx = ( prev_wqe_idx + 1 ); - if ( wq->iobuf[wqe_idx & wqe_idx_mask] ) { + if ( wq->iobufs[wqe_idx & wqe_idx_mask] ) { DBGC ( arbel, "ARBEL %p send queue full", arbel ); return -ENOBUFS; } - prev_wqe = &arbel_wq->wqe[prev_wqe_idx & wqe_idx_mask]; - wqe = &arbel_wq->wqe[wqe_idx & wqe_idx_mask]; + wq->iobufs[wqe_idx & wqe_idx_mask] = iobuf; + prev_wqe = &arbel_wq->wqe_u[prev_wqe_idx & wqe_idx_mask].wqe_cont.wqe; + wqe = &arbel_wq->wqe_u[wqe_idx & wqe_idx_mask].wqe_cont.wqe; /* Construct work queue entry */ memset ( &wqe->next.control, 0, diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index ccb6e49e4..8b3a2f7c0 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -73,10 +73,10 @@ struct ib_work_queue { * This is the index of the most recently posted entry. */ unsigned int posted; + /** I/O buffers assigned to work queue */ + struct io_buffer **iobufs; /** Driver private data */ void *priv; - /** I/O buffers assigned to work queue */ - struct io_buffer *iobuf[0]; }; /** An Infiniband Queue Pair */ -- cgit v1.2.3-55-g7522 From 970951666fe478399b79bdc4b66c015b06fff563 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 02:53:05 +0100 Subject: arbel_post_send() has been observed to transmit a packet! --- src/drivers/net/mlx_ipoib/ib_mt25218.c | 12 +++++++ src/drivers/net/mlx_ipoib/mt25218.c | 60 ++++++++++++++++++++++------------ src/include/gpxe/infiniband.h | 7 ++-- 3 files changed, 56 insertions(+), 23 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index dcd49e45d..8122a20ef 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -1122,9 +1122,14 @@ static int post_send_req(void *qph, void *wqeh, __u8 num_gather) struct send_doorbell_st dbell; __u32 nds; + DBG ( "Work queue entry:\n" ); + DBG_HD ( snd_wqe, sizeof ( *snd_wqe ) ); + qp->post_send_counter++; WRITE_WORD_VOL(qp->send_uar_context, 2, htons(qp->post_send_counter)); + DBG ( "Doorbell record:\n" ); + DBG_HD ( qp->send_uar_context, 8 ); memset(&dbell, 0, sizeof dbell); INS_FLD(XDEV_NOPCODE_SEND, &dbell, arbelprm_send_doorbell_st, nopcode); @@ -1148,6 +1153,10 @@ static int post_send_req(void *qph, void *wqeh, __u8 num_gather) INS_FLD_TO_BE(XDEV_NOPCODE_SEND, &qp->last_posted_snd_wqe->next.next, arbelprm_wqe_segment_next_st, nopcode); + + DBG ( "Previous work queue entry's next field:\n" ); + DBG_HD ( &qp->last_posted_snd_wqe->next.next, + sizeof ( qp->last_posted_snd_wqe->next.next ) ); } rc = cmd_post_doorbell(&dbell, POST_SND_OFFSET); @@ -1965,6 +1974,9 @@ static void dev_post_dbell(void *dbell, __u32 offset) address = (unsigned long)(memfree_pci_dev.uar) + offset; tprintf("va=0x%lx pa=0x%lx", address, virt_to_bus((const void *)address)); + DBG ( "dev_post_dbell %08lx:%08lx to %lx\n", + htonl ( ptr[0] ), htonl ( ptr[1] ), + virt_to_phys ( memfree_pci_dev.uar + offset ) ); writel(htonl(ptr[0]), memfree_pci_dev.uar + offset); barrier(); address += 4; diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 601a1f407..26e02cd02 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -115,7 +115,7 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, struct ib_queue_pair *qp ); static struct io_buffer *tx_ring[NUM_IPOIB_SND_WQES]; -static int tx_posted = 0; +static int next_tx_idx = 0; static int mlx_transmit_direct ( struct net_device *netdev, struct io_buffer *iobuf ) { @@ -128,7 +128,7 @@ static int mlx_transmit_direct ( struct net_device *netdev, }; struct arbel_send_work_queue arbel_send_queue = { .doorbell_idx = IPOIB_SND_QP_DB_IDX, - .wqe_u = ( (struct udqp_st *) ipoib_data.ipoib_qph )->snd_wq, + .wqe_u = ( (struct udqp_st *) mlx->ipoib_qph )->snd_wq, }; struct ib_device ibdev = { .priv = &arbel, @@ -137,7 +137,7 @@ static int mlx_transmit_direct ( struct net_device *netdev, .qpn = ib_get_qpn ( mlx->ipoib_qph ), .send = { .num_wqes = NUM_IPOIB_SND_WQES, - .posted = tx_posted, + .next_idx = next_tx_idx, .iobufs = tx_ring, .priv = &arbel_send_queue, }, @@ -156,7 +156,7 @@ static int mlx_transmit_direct ( struct net_device *netdev, rc = arbel_post_send ( &ibdev, iobuf, &av, &qp ); - tx_posted = qp.send.posted; + next_tx_idx = qp.send.next_idx; return rc; } @@ -286,7 +286,11 @@ static void mlx_irq ( struct net_device *netdev, int enable ) { static struct net_device_operations mlx_operations = { .open = mlx_open, .close = mlx_close, +#if 0 + .transmit = mlx_transmit, +#else .transmit = mlx_transmit_direct, +#endif .poll = mlx_poll, .irq = mlx_irq, }; @@ -301,6 +305,10 @@ static void arbel_ring_doorbell ( struct arbel *arbel, void *db_reg, unsigned int offset ) { uint32_t *db_reg_dword = db_reg; + DBG ( "arbel_ring_doorbell %08lx:%08lx to %lx\n", + db_reg_dword[0], db_reg_dword[1], + virt_to_phys ( arbel->uar + offset ) ); + barrier(); writel ( db_reg_dword[0], ( arbel->uar + offset + 0 ) ); barrier(); @@ -314,8 +322,6 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, struct ib_work_queue *wq = &qp->send; struct arbel_send_work_queue *arbel_wq = wq->priv; unsigned int wqe_idx_mask = ( wq->num_wqes - 1 ); - unsigned int prev_wqe_idx; - unsigned int wqe_idx; struct ud_send_wqe_st *prev_wqe; struct ud_send_wqe_st *wqe; struct ib_gid *gid; @@ -324,17 +330,17 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, struct send_doorbell_st db_reg; /* Allocate work queue entry */ - prev_wqe_idx = wq->posted; - wqe_idx = ( prev_wqe_idx + 1 ); - if ( wq->iobufs[wqe_idx & wqe_idx_mask] ) { + if ( wq->iobufs[wq->next_idx & wqe_idx_mask] ) { DBGC ( arbel, "ARBEL %p send queue full", arbel ); return -ENOBUFS; } - wq->iobufs[wqe_idx & wqe_idx_mask] = iobuf; - prev_wqe = &arbel_wq->wqe_u[prev_wqe_idx & wqe_idx_mask].wqe_cont.wqe; - wqe = &arbel_wq->wqe_u[wqe_idx & wqe_idx_mask].wqe_cont.wqe; + wq->iobufs[wq->next_idx & wqe_idx_mask] = iobuf; + prev_wqe = &arbel_wq->wqe_u[(wq->next_idx - 1) & wqe_idx_mask].wqe_cont.wqe; + wqe = &arbel_wq->wqe_u[wq->next_idx & wqe_idx_mask].wqe_cont.wqe; /* Construct work queue entry */ + MLX_POPULATE_1 ( &wqe->next.next, arbelprm_wqe_segment_next_st, 1, + always1, 1 ); memset ( &wqe->next.control, 0, sizeof ( wqe->next.control ) ); MLX_POPULATE_1 ( &wqe->next.control, @@ -359,13 +365,22 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, destination_qp, av->dest_qp ); MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 9, q_key, av->qkey ); - wqe->mpointer[0].local_addr_l = - cpu_to_be32 ( virt_to_bus ( iobuf->data ) ); + + // wqe->mpointer[0].local_addr_l = + // cpu_to_be32 ( virt_to_bus ( iobuf->data ) ); + + memcpy ( bus_to_virt ( be32_to_cpu ( wqe->mpointer[0].local_addr_l ) ), + iobuf->data, iob_len ( iobuf ) ); + + wqe->mpointer[0].byte_count = cpu_to_be32 ( iob_len ( iobuf ) ); + DBG ( "Work queue entry:\n" ); + DBG_HD ( wqe, sizeof ( *wqe ) ); + /* Update previous work queue entry's "next" field */ - nds = ( offsetof ( typeof ( *wqe ), mpointer ) + - sizeof ( wqe->mpointer[0] ) ); + nds = ( ( offsetof ( typeof ( *wqe ), mpointer ) + + sizeof ( wqe->mpointer[0] ) ) >> 4 ); MLX_MODIFY ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 0, nopcode, XDEV_NOPCODE_SEND ); MLX_POPULATE_3 ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 1, @@ -373,25 +388,30 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, f, 1, always1, 1 ); + DBG ( "Previous work queue entry's next field:\n" ); + DBG_HD ( &prev_wqe->next.next, sizeof ( prev_wqe->next.next ) ); + /* Update doorbell record */ db_rec = &arbel->db_rec[arbel_wq->doorbell_idx]; MLX_POPULATE_1 ( db_rec, arbelprm_qp_db_record_st, 0, - counter, ( wqe_idx & 0xffff ) ); + counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); barrier(); + DBG ( "Doorbell record:\n" ); + DBG_HD ( db_rec, 8 ); /* Ring doorbell register */ MLX_POPULATE_4 ( &db_reg, arbelprm_send_doorbell_st, 0, nopcode, XDEV_NOPCODE_SEND, f, 1, - wqe_counter, ( prev_wqe_idx & 0xffff ), + wqe_counter, ( wq->next_idx & 0xffff ), wqe_cnt, 1 ); MLX_POPULATE_2 ( &db_reg, arbelprm_send_doorbell_st, 1, nds, nds, qpn, qp->qpn ); arbel_ring_doorbell ( arbel, &db_reg, POST_SND_OFFSET ); - /* Update work queue's posted index */ - wq->posted = wqe_idx; + /* Update work queue's index */ + wq->next_idx++; return 0; } diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 8b3a2f7c0..9337af353 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -68,11 +68,12 @@ struct ibhdr { struct ib_work_queue { /** Number of work queue entries */ unsigned int num_wqes; - /** Posted index + /** Next work queue entry index * - * This is the index of the most recently posted entry. + * This is the index of the next entry to be filled (i.e. the + * first empty entry). */ - unsigned int posted; + unsigned int next_idx; /** I/O buffers assigned to work queue */ struct io_buffer **iobufs; /** Driver private data */ -- cgit v1.2.3-55-g7522 From a3a91fedc11ea3f8de4c0ad3378ea610f1ccb960 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 15:40:35 +0100 Subject: Started added poll_cq() verb. Started reworking MLX_EXTRACT(), MLX_POPULATE() etc. to automatically determine type information. --- src/drivers/net/mlx_ipoib/bit_ops.h | 165 +++++++++++++++++++++--------------- src/drivers/net/mlx_ipoib/mt25218.c | 103 +++++++++++++++++++--- src/include/gpxe/infiniband.h | 56 ++++++++++-- 3 files changed, 240 insertions(+), 84 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/bit_ops.h b/src/drivers/net/mlx_ipoib/bit_ops.h index 969de642c..2bc7684d7 100644 --- a/src/drivers/net/mlx_ipoib/bit_ops.h +++ b/src/drivers/net/mlx_ipoib/bit_ops.h @@ -137,112 +137,139 @@ struct addr_64_st { /* Remaining code Copyright Fen Systems Ltd. 2007 */ +/** + * Wrapper structure for pseudo_bit_t structures + * + * This structure provides a wrapper around the autogenerated + * pseudo_bit_t structures. It has the correct size, and also + * encapsulates type information about the underlying pseudo_bit_t + * structure, which allows the MLX_POPULATE etc. macros to work + * without requiring explicit type information. + */ +#define MLX_DECLARE_STRUCT( _structure ) \ + _structure { \ + union { \ + uint8_t bytes[ sizeof ( struct _structure ## _st ) / 8 ]; \ + uint32_t dwords[ sizeof ( struct _structure ## _st ) / 32 ]; \ + struct _structure ## _st *dummy[0]; \ + } u; \ + } + +/** Get pseudo_bit_t structure type from wrapper structure pointer */ +#define MLX_PSEUDO_STRUCT( _ptr ) \ + typeof ( *((_ptr)->u.dummy[0]) ) + /** Bit offset of a field within a pseudo_bit_t structure */ -#define MLX_BIT_OFFSET( _structure, _field ) \ - offsetof ( struct _structure, _field ) +#define MLX_BIT_OFFSET( _structure_st, _field ) \ + offsetof ( _structure_st, _field ) /** Dword offset of a field within a pseudo_bit_t structure */ -#define MLX_DWORD_OFFSET( _structure, _field ) \ - ( MLX_BIT_OFFSET ( _structure, _field ) / 32 ) +#define MLX_DWORD_OFFSET( _structure_st, _field ) \ + ( MLX_BIT_OFFSET ( _structure_st, _field ) / 32 ) /** Dword bit offset of a field within a pseudo_bit_t structure * * Yes, using mod-32 would work, but would lose the check for the * error of specifying a mismatched field name and dword index. */ -#define MLX_DWORD_BIT_OFFSET( _structure, _index, _field ) \ - ( MLX_BIT_OFFSET ( _structure, _field ) - ( 32 * (_index) ) ) +#define MLX_DWORD_BIT_OFFSET( _structure_st, _index, _field ) \ + ( MLX_BIT_OFFSET ( _structure_st, _field ) - ( 32 * (_index) ) ) /** Bit width of a field within a pseudo_bit_t structure */ -#define MLX_BIT_WIDTH( _structure, _field ) \ - sizeof ( ( ( struct _structure * ) NULL )->_field ) +#define MLX_BIT_WIDTH( _structure_st, _field ) \ + sizeof ( ( ( _structure_st * ) NULL )->_field ) /** Bit mask for a field within a pseudo_bit_t structure */ -#define MLX_BIT_MASK( _structure, _field ) \ - ( ( 1 << MLX_BIT_WIDTH ( _structure, _field ) ) - 1 ) +#define MLX_BIT_MASK( _structure_st, _field ) \ + ( ( 1 << MLX_BIT_WIDTH ( _structure_st, _field ) ) - 1 ) /* * Assemble native-endian dword from named fields and values * */ -#define MLX_ASSEMBLE_1( _structure, _index, _field, _value ) \ - ( (_value) << MLX_DWORD_BIT_OFFSET ( _structure, _index, _field ) ) +#define MLX_ASSEMBLE_1( _structure_st, _index, _field, _value ) \ + ( (_value) << MLX_DWORD_BIT_OFFSET ( _structure_st, _index, _field ) ) -#define MLX_ASSEMBLE_2( _structure, _index, _field, _value, ... ) \ - ( MLX_ASSEMBLE_1 ( _structure, _index, _field, _value ) | \ - MLX_ASSEMBLE_1 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_ASSEMBLE_2( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_1 ( _structure_st, _index, __VA_ARGS__ ) ) -#define MLX_ASSEMBLE_3( _structure, _index, _field, _value, ... ) \ - ( MLX_ASSEMBLE_1 ( _structure, _index, _field, _value ) | \ - MLX_ASSEMBLE_2 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_ASSEMBLE_3( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_2 ( _structure_st, _index, __VA_ARGS__ ) ) -#define MLX_ASSEMBLE_4( _structure, _index, _field, _value, ... ) \ - ( MLX_ASSEMBLE_1 ( _structure, _index, _field, _value ) | \ - MLX_ASSEMBLE_3 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_ASSEMBLE_4( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_3 ( _structure_st, _index, __VA_ARGS__ ) ) /* * Build native-endian (positive) dword bitmasks from named fields * */ -#define MLX_MASK_1( _structure, _index, _field ) \ - ( MLX_BIT_MASK ( _structure, _field ) << \ - MLX_DWORD_BIT_OFFSET ( _structure, _index, _field ) ) +#define MLX_MASK_1( _structure_st, _index, _field ) \ + ( MLX_BIT_MASK ( _structure_st, _field ) << \ + MLX_DWORD_BIT_OFFSET ( _structure_st, _index, _field ) ) -#define MLX_MASK_2( _structure, _index, _field, ... ) \ - ( MLX_MASK_1 ( _structure, _index, _field ) | \ - MLX_MASK_1 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_MASK_2( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_1 ( _structure_st, _index, __VA_ARGS__ ) ) -#define MLX_MASK_3( _structure, _index, _field, ... ) \ - ( MLX_MASK_1 ( _structure, _index, _field ) | \ - MLX_MASK_2 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_MASK_3( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_2 ( _structure_st, _index, __VA_ARGS__ ) ) -#define MLX_MASK_4( _structure, _index, _field, ... ) \ - ( MLX_MASK_1 ( _structure, _index, _field ) | \ - MLX_MASK_3 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_MASK_4( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_3 ( _structure_st, _index, __VA_ARGS__ ) ) /* * Populate big-endian dwords from named fields and values * */ -#define MLX_POPULATE( _base, _index, _assembled ) \ - do { \ - uint32_t *__ptr = ( ( (uint32_t *) (_base) ) + (_index) ); \ - uint32_t __assembled = (_assembled); \ - *__ptr = cpu_to_be32 ( __assembled ); \ +#define MLX_POPULATE( _ptr, _index, _assembled ) \ + do { \ + uint32_t *__ptr = &(_ptr)->u.dwords[(_index)]; \ + uint32_t __assembled = (_assembled); \ + *__ptr = cpu_to_be32 ( __assembled ); \ } while ( 0 ) -#define MLX_POPULATE_1( _base, _structure, _index, ... ) \ - MLX_POPULATE ( _base, _index, \ - MLX_ASSEMBLE_1 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_POPULATE_1( _ptr, _index, ... ) \ + MLX_POPULATE ( _ptr, _index, \ + MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ + _index, __VA_ARGS__ ) ) -#define MLX_POPULATE_2( _base, _structure, _index, ... ) \ - MLX_POPULATE ( _base, _index, \ - MLX_ASSEMBLE_2 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_POPULATE_2( _ptr, _index, ... ) \ + MLX_POPULATE ( _ptr, _index, \ + MLX_ASSEMBLE_2 ( MLX_PSEUDO_STRUCT ( _ptr ), \ + _index, __VA_ARGS__ ) ) -#define MLX_POPULATE_3( _base, _structure, _index, ... ) \ - MLX_POPULATE ( _base, _index, \ - MLX_ASSEMBLE_3 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_POPULATE_3( _ptr, _index, ... ) \ + MLX_POPULATE ( _ptr, _index, \ + MLX_ASSEMBLE_3 ( MLX_PSEUDO_STRUCT ( _ptr ), \ + _index, __VA_ARGS__ ) ) -#define MLX_POPULATE_4( _base, _structure, _index, ... ) \ - MLX_POPULATE ( _base, _index, \ - MLX_ASSEMBLE_4 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_POPULATE_4( _ptr, _index, ... ) \ + MLX_POPULATE ( _ptr, _index, \ + MLX_ASSEMBLE_4 ( MLX_PSEUDO_STRUCT ( _ptr ), \ + _index, __VA_ARGS__ ) ) /* * Modify big-endian dword using named field and value * */ -#define MLX_MODIFY( _base, _structure, _index, _field, _value ) \ - do { \ - uint32_t *__ptr = ( ( (uint32_t *) (_base) ) + (_index) ); \ - uint32_t __value = be32_to_cpu ( *__ptr ); \ - __value &= ~( MLX_MASK_1 ( _structure, _index, _field ) ); \ - __value |= MLX_ASSEMBLE_1 ( _structure, _index, \ - _field, _value ); \ - *__ptr = cpu_to_be32 ( __value ); \ +#define MLX_MODIFY( _ptr, _index, _field, _value ) \ + do { \ + uint32_t *__ptr = &(_ptr)->u.dwords[(_index)]; \ + uint32_t __value = be32_to_cpu ( *__ptr ); \ + __value &= ~( MLX_MASK_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ + _index, _field ) ); \ + __value |= MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ + _index, _field, _value ); \ + *__ptr = cpu_to_be32 ( __value ); \ } while ( 0 ) /* @@ -250,16 +277,18 @@ struct addr_64_st { * */ -#define MLX_EXTRACT( _base, _structure, _field ) \ - ( { \ - unsigned int __index = \ - MLX_DWORD_OFFSET ( _structure, _field ); \ - uint32_t *__ptr = ( ( (uint32_t *) (_base) ) + __index ); \ - uint32_t __value = be32_to_cpu ( *__ptr ); \ - __value >>= MLX_DWORD_BIT_OFFSET ( _structure, __index, \ - _field ); \ - __value &= MLX_BIT_MASK ( _structure, _field ); \ - __value; \ +#define MLX_EXTRACT( _ptr, _field ) \ + ( { \ + unsigned int __index = \ + MLX_DWORD_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ + uint32_t *__ptr = &(_ptr)->u.dwords[__index]; \ + uint32_t __value = be32_to_cpu ( *__ptr ); \ + __value >>= \ + MLX_DWORD_BIT_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), \ + __index, _field ); \ + __value &= \ + MLX_BIT_MASK ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ + __value; \ } ) #endif /* __bit_ops_h__ */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index ecf873bb0..42e5465ce 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -23,14 +23,23 @@ Skeleton NIC driver for Etherboot #include "mt25218_imp.c" +#include "arbel.h" + struct arbel_send_work_queue { - /** Doorbell number */ + /** Doorbell record number */ unsigned int doorbell_idx; /** Work queue entries */ // struct ud_send_wqe_st *wqe; union ud_send_wqe_u *wqe_u; }; +struct arbel_completion_queue { + /** Doorbell record number */ + unsigned int doorbell_idx; + /** Completion queue entries */ + union cqe_st *cqe; +}; + struct arbel { /** User Access Region */ void *uar; @@ -143,13 +152,14 @@ static int mlx_transmit_direct ( struct net_device *netdev, }, }; struct ud_av_st *bcast_av = mlx->bcast_av; - struct address_vector_st *bav = &bcast_av->av; + struct arbelprm_ud_address_vector *bav = + ( struct arbelprm_ud_address_vector * ) &bcast_av->av; struct ib_address_vector av = { .dest_qp = bcast_av->dest_qp, .qkey = bcast_av->qkey, - .dlid = MLX_EXTRACT ( bav, arbelprm_ud_address_vector_st, rlid ), - .rate = ( MLX_EXTRACT ( bav, arbelprm_ud_address_vector_st, max_stat_rate ) ? 1 : 4 ), - .sl = MLX_EXTRACT ( bav, arbelprm_ud_address_vector_st, sl ), + .dlid = MLX_EXTRACT ( bav, rlid ), + .rate = ( MLX_EXTRACT ( bav, max_stat_rate ) ? 1 : 4 ), + .sl = MLX_EXTRACT ( bav, sl ), .gid_present = 1, }; memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); @@ -301,6 +311,13 @@ static struct ib_gid arbel_no_gid = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } }; +/** + * Ring doorbell register in UAR + * + * @v arbel Arbel device + * @v db_reg Doorbell register structure + * @v offset Address of doorbell + */ static void arbel_ring_doorbell ( struct arbel *arbel, void *db_reg, unsigned int offset ) { uint32_t *db_reg_dword = db_reg; @@ -315,6 +332,15 @@ static void arbel_ring_doorbell ( struct arbel *arbel, void *db_reg, writel ( db_reg_dword[1], ( arbel->uar + offset + 4 ) ); } +/** + * Post send work queue entry + * + * @v ibdev Infiniband device + * @v iobuf I/O buffer + * @v av Address vector + * @v qp Queue pair + * @ret rc Return status code + */ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, struct ib_address_vector *av, struct ib_queue_pair *qp ) { @@ -365,14 +391,8 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, destination_qp, av->dest_qp ); MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 9, q_key, av->qkey ); - wqe->mpointer[0].local_addr_l = cpu_to_be32 ( virt_to_bus ( iobuf->data ) ); - - // memcpy ( bus_to_virt ( be32_to_cpu ( wqe->mpointer[0].local_addr_l ) ), - // iobuf->data, iob_len ( iobuf ) ); - - wqe->mpointer[0].byte_count = cpu_to_be32 ( iob_len ( iobuf ) ); DBG ( "Work queue entry:\n" ); @@ -416,8 +436,69 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, return 0; } +static void arbel_parse_completion ( struct arbel *arbel, + union cqe_st *cqe, + struct ib_completion *completion ) { + memset ( completion, 0, sizeof ( *completion ) ); + is_send = MLX_EXTRACT ( cqe, arbelprm_completion_queue_entry_st, s ); + completion->len = + MLX_EXTRACT ( cqe, arbelprm_completion_queue_entry_st, + byte_cnt );} + +/** + * Poll completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @v complete Completion handler + */ +static void arbel_poll_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq, + ib_completer_t complete_send, + ib_completer_t complete_recv ) { + struct arbel *arbel = ibdev->priv; + struct arbel_completion_queue *arbel_cq = cq->priv; + unsigned int cqe_idx_mask = ( cq->num_cqes - 1 ); + union db_record_st *db_rec = &arbel->db_rec[arbel_cq->doorbell_idx]; + union cqe_st *cqe; + struct ib_completion completion; + struct io_buffer *iobuf; + int is_send; + + while ( 1 ) { + /* Look for completion entry */ + cqe = &arbel_cq->cqe[cq->next_idx & cqe_idx_mask]; + if ( MLX_EXTRACT ( cqe, arbelprm_completion_queue_entry_st, + owner ) != 0 ) { + /* Entry still owned by hardware; end of poll */ + break; + } + + /* Parse completion */ + + + + /* Handle completion */ + ( is_send ? complete_send : complete_recv ) ( ibdev, + &completion, + iobuf ); + + /* Return ownership to hardware */ + MLX_POPULATE_1 ( cqe, arbelprm_completion_queue_entry_st, 7, + owner, 1 ); + barrier(); + /* Update completion queue's index */ + cq->next_idx++; + /* Update doorbell record */ + MLX_POPULATE_1 ( db_rec, arbelprm_cq_ci_db_record_st, 0, + counter, ( cq->next_idx & 0xffffffffUL ) ); + } +} + +/** Arbel Infiniband operations */ static struct ib_device_operations arbel_ib_operations = { .post_send = arbel_post_send, + .poll_cq = arbel_poll_cq, }; /** diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 9337af353..c08191586 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -63,6 +63,7 @@ struct ibhdr { +struct ib_device; /** An Infiniband Work Queue */ struct ib_work_queue { @@ -71,9 +72,11 @@ struct ib_work_queue { /** Next work queue entry index * * This is the index of the next entry to be filled (i.e. the - * first empty entry). + * first empty entry). This value is not bounded by num_wqes; + * users must logical-AND with (num_wqes-1) to generate an + * array index. */ - unsigned int next_idx; + unsigned long next_idx; /** I/O buffers assigned to work queue */ struct io_buffer **iobufs; /** Driver private data */ @@ -92,6 +95,38 @@ struct ib_queue_pair { void *priv; }; +/** An Infiniband Completion Queue */ +struct ib_completion_queue { + /** Number of completion queue entries */ + unsigned int num_cqes; + /** Next completion queue entry index + * + * This is the index of the next entry to be filled (i.e. the + * first empty entry). This value is not bounded by num_wqes; + * users must logical-AND with (num_wqes-1) to generate an + * array index. + */ + unsigned long next_idx; + /** Driver private data */ + void *priv; +}; + +/** An Infiniband completion */ +struct ib_completion { + /** Length */ + size_t len; +}; + +/** An Infiniband completion handler + * + * @v ibdev Infiniband device + * @v completion Completion + * @v iobuf I/O buffer + */ +typedef void ( * ib_completer_t ) ( struct ib_device *ibdev, + struct ib_completion *completion, + struct io_buffer *iobuf ); + /** An Infiniband Address Vector */ struct ib_address_vector { /** Destination Queue Pair */ @@ -110,15 +145,13 @@ struct ib_address_vector { struct ib_gid gid; }; -struct ib_device; - /** * Infiniband device operations * * These represent a subset of the Infiniband Verbs. */ struct ib_device_operations { - /** Post Send work queue entry + /** Post send work queue entry * * @v ibdev Infiniband device * @v iobuf I/O buffer @@ -135,6 +168,19 @@ struct ib_device_operations { struct io_buffer *iobuf, struct ib_address_vector *av, struct ib_queue_pair *qp ); + /** Poll completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @v complete_send Send completion handler + * @v complete_recv Receive completion handler + * + * The completion handler takes ownership of the I/O buffer. + */ + void ( * poll_cq ) ( struct ib_device *ibdev, + struct ib_completion_queue *cq, + ib_completer_t complete_send, + ib_completer_t complete_recv ); }; /** An Infiniband device */ -- cgit v1.2.3-55-g7522 From 6a791649f0a907c595b5efef5cbb1b2d8d6a9713 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 18:44:09 +0100 Subject: Updated MLX_* accessor macros to use implicit type information. --- src/drivers/net/mlx_ipoib/arbel.h | 39 ++++++++++ src/drivers/net/mlx_ipoib/bit_ops.h | 50 ++++++------- src/drivers/net/mlx_ipoib/mt25218.c | 141 +++++++++++++++++------------------- src/include/gpxe/infiniband.h | 2 + 4 files changed, 132 insertions(+), 100 deletions(-) create mode 100644 src/drivers/net/mlx_ipoib/arbel.h (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h new file mode 100644 index 000000000..e09930448 --- /dev/null +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -0,0 +1,39 @@ +#ifndef _ARBEL_H +#define _ARBEL_H + +struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_entry ); +struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); +struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_send_doorbell ); +struct MLX_DECLARE_STRUCT ( arbelprm_ud_address_vector ); +struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ctrl_send ); +struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_data_ptr ); +struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_next ); +struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ud ); + +#define ARBELPRM_MAX_GATHER 1 + +struct arbelprm_ud_send_wqe { + struct arbelprm_wqe_segment_next next; + struct arbelprm_wqe_segment_ctrl_send ctrl; + struct arbelprm_wqe_segment_ud ud; + struct arbelprm_wqe_segment_data_ptr data[ARBELPRM_MAX_GATHER]; +} __attribute__ (( packed )); + +union arbelprm_completion_entry { + struct arbelprm_completion_queue_entry normal; + struct arbelprm_completion_with_error error; +} __attribute__ (( packed )); + +union arbelprm_doorbell_record { + struct arbelprm_cq_ci_db_record cq_ci; + struct arbelprm_qp_db_record qp; +} __attribute__ (( packed )); + +union arbelprm_doorbell_register { + struct arbelprm_send_doorbell send; + uint32_t dword[2]; +} __attribute__ (( packed )); + +#endif /* _ARBEL_H */ diff --git a/src/drivers/net/mlx_ipoib/bit_ops.h b/src/drivers/net/mlx_ipoib/bit_ops.h index 2bc7684d7..960d06683 100644 --- a/src/drivers/net/mlx_ipoib/bit_ops.h +++ b/src/drivers/net/mlx_ipoib/bit_ops.h @@ -143,8 +143,8 @@ struct addr_64_st { * This structure provides a wrapper around the autogenerated * pseudo_bit_t structures. It has the correct size, and also * encapsulates type information about the underlying pseudo_bit_t - * structure, which allows the MLX_POPULATE etc. macros to work - * without requiring explicit type information. + * structure, which allows the MLX_FILL etc. macros to work without + * requiring explicit type information. */ #define MLX_DECLARE_STRUCT( _structure ) \ _structure { \ @@ -181,7 +181,8 @@ struct addr_64_st { /** Bit mask for a field within a pseudo_bit_t structure */ #define MLX_BIT_MASK( _structure_st, _field ) \ - ( ( 1 << MLX_BIT_WIDTH ( _structure_st, _field ) ) - 1 ) + ( ( ~( ( uint32_t ) 0 ) ) >> \ + ( 32 - MLX_BIT_WIDTH ( _structure_st, _field ) ) ) /* * Assemble native-endian dword from named fields and values @@ -229,46 +230,45 @@ struct addr_64_st { * */ -#define MLX_POPULATE( _ptr, _index, _assembled ) \ +#define MLX_FILL( _ptr, _index, _assembled ) \ do { \ uint32_t *__ptr = &(_ptr)->u.dwords[(_index)]; \ uint32_t __assembled = (_assembled); \ *__ptr = cpu_to_be32 ( __assembled ); \ } while ( 0 ) -#define MLX_POPULATE_1( _ptr, _index, ... ) \ - MLX_POPULATE ( _ptr, _index, \ - MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ - _index, __VA_ARGS__ ) ) +#define MLX_FILL_1( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) -#define MLX_POPULATE_2( _ptr, _index, ... ) \ - MLX_POPULATE ( _ptr, _index, \ - MLX_ASSEMBLE_2 ( MLX_PSEUDO_STRUCT ( _ptr ), \ - _index, __VA_ARGS__ ) ) +#define MLX_FILL_2( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_2 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) -#define MLX_POPULATE_3( _ptr, _index, ... ) \ - MLX_POPULATE ( _ptr, _index, \ - MLX_ASSEMBLE_3 ( MLX_PSEUDO_STRUCT ( _ptr ), \ - _index, __VA_ARGS__ ) ) +#define MLX_FILL_3( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_3 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) + +#define MLX_FILL_4( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_4 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) -#define MLX_POPULATE_4( _ptr, _index, ... ) \ - MLX_POPULATE ( _ptr, _index, \ - MLX_ASSEMBLE_4 ( MLX_PSEUDO_STRUCT ( _ptr ), \ - _index, __VA_ARGS__ ) ) /* * Modify big-endian dword using named field and value * */ -#define MLX_MODIFY( _ptr, _index, _field, _value ) \ +#define MLX_SET( _ptr, _field, _value ) \ do { \ - uint32_t *__ptr = &(_ptr)->u.dwords[(_index)]; \ + unsigned int __index = \ + MLX_DWORD_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ + uint32_t *__ptr = &(_ptr)->u.dwords[__index]; \ uint32_t __value = be32_to_cpu ( *__ptr ); \ __value &= ~( MLX_MASK_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ - _index, _field ) ); \ + __index, _field ) ); \ __value |= MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ - _index, _field, _value ); \ + __index, _field, _value ); \ *__ptr = cpu_to_be32 ( __value ); \ } while ( 0 ) @@ -277,7 +277,7 @@ struct addr_64_st { * */ -#define MLX_EXTRACT( _ptr, _field ) \ +#define MLX_GET( _ptr, _field ) \ ( { \ unsigned int __index = \ MLX_DWORD_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 42e5465ce..0453ba79f 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -37,14 +37,14 @@ struct arbel_completion_queue { /** Doorbell record number */ unsigned int doorbell_idx; /** Completion queue entries */ - union cqe_st *cqe; + union arbelprm_completion_entry *cqe; }; struct arbel { /** User Access Region */ void *uar; /** Doorbell records */ - union db_record_st *db_rec; + union arbelprm_doorbell_record *db_rec; }; @@ -157,9 +157,9 @@ static int mlx_transmit_direct ( struct net_device *netdev, struct ib_address_vector av = { .dest_qp = bcast_av->dest_qp, .qkey = bcast_av->qkey, - .dlid = MLX_EXTRACT ( bav, rlid ), - .rate = ( MLX_EXTRACT ( bav, max_stat_rate ) ? 1 : 4 ), - .sl = MLX_EXTRACT ( bav, sl ), + .dlid = MLX_GET ( bav, rlid ), + .rate = ( MLX_GET ( bav, max_stat_rate ) ? 1 : 4 ), + .sl = MLX_GET ( bav, sl ), .gid_present = 1, }; memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); @@ -318,18 +318,18 @@ static struct ib_gid arbel_no_gid = { * @v db_reg Doorbell register structure * @v offset Address of doorbell */ -static void arbel_ring_doorbell ( struct arbel *arbel, void *db_reg, +static void arbel_ring_doorbell ( struct arbel *arbel, + union arbelprm_doorbell_register *db_reg, unsigned int offset ) { - uint32_t *db_reg_dword = db_reg; DBG ( "arbel_ring_doorbell %08lx:%08lx to %lx\n", - db_reg_dword[0], db_reg_dword[1], + db_reg->dword[0], db_reg->dword[1], virt_to_phys ( arbel->uar + offset ) ); barrier(); - writel ( db_reg_dword[0], ( arbel->uar + offset + 0 ) ); + writel ( db_reg->dword[0], ( arbel->uar + offset + 0 ) ); barrier(); - writel ( db_reg_dword[1], ( arbel->uar + offset + 4 ) ); + writel ( db_reg->dword[1], ( arbel->uar + offset + 4 ) ); } /** @@ -347,15 +347,16 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, struct arbel *arbel = ibdev->priv; struct ib_work_queue *wq = &qp->send; struct arbel_send_work_queue *arbel_wq = wq->priv; - unsigned int wqe_idx_mask = ( wq->num_wqes - 1 ); - struct ud_send_wqe_st *prev_wqe; - struct ud_send_wqe_st *wqe; + struct arbelprm_ud_send_wqe *prev_wqe; + struct arbelprm_ud_send_wqe *wqe; + union arbelprm_doorbell_record *db_rec; + union arbelprm_doorbell_register db_reg; struct ib_gid *gid; + unsigned int wqe_idx_mask; size_t nds; - union db_record_st *db_rec; - struct send_doorbell_st db_reg; /* Allocate work queue entry */ + wqe_idx_mask = ( wq->num_wqes - 1 ); if ( wq->iobufs[wq->next_idx & wqe_idx_mask] ) { DBGC ( arbel, "ARBEL %p send queue full", arbel ); return -ENOBUFS; @@ -365,69 +366,61 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, wqe = &arbel_wq->wqe_u[wq->next_idx & wqe_idx_mask].wqe_cont.wqe; /* Construct work queue entry */ - MLX_POPULATE_1 ( &wqe->next.next, arbelprm_wqe_segment_next_st, 1, - always1, 1 ); - memset ( &wqe->next.control, 0, - sizeof ( wqe->next.control ) ); - MLX_POPULATE_1 ( &wqe->next.control, - arbelprm_wqe_segment_ctrl_send_st, 0, - always1, 1 ); - memset ( &wqe->udseg, 0, sizeof ( wqe->udseg ) ); - MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 0, - pd, GLOBAL_PD, - port_number, PXE_IB_PORT ); - MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 1, - rlid, av->dlid, - g, av->gid_present ); - MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 2, - max_stat_rate, ( ( av->rate >= 3 ) ? 0 : 1 ), - msg, 3 ); - MLX_POPULATE_1 ( &wqe->udseg, arbelprm_ud_address_vector_st, 3, - sl, av->sl ); + MLX_FILL_1 ( &wqe->next, 1, always1, 1 ); + memset ( &wqe->ctrl, 0, sizeof ( wqe->ctrl ) ); + MLX_FILL_1 ( &wqe->ctrl, 0, always1, 1 ); + memset ( &wqe->ud, 0, sizeof ( wqe->ud ) ); + MLX_FILL_2 ( &wqe->ud, 0, + ud_address_vector.pd, GLOBAL_PD, + ud_address_vector.port_number, PXE_IB_PORT ); + MLX_FILL_2 ( &wqe->ud, 1, + ud_address_vector.rlid, av->dlid, + ud_address_vector.g, av->gid_present ); + MLX_FILL_2 ( &wqe->ud, 2, + ud_address_vector.max_stat_rate, + ( ( av->rate >= 3 ) ? 0 : 1 ), + ud_address_vector.msg, 3 ); + MLX_FILL_1 ( &wqe->ud, 3, ud_address_vector.sl, av->sl ); gid = ( av->gid_present ? &av->gid : &arbel_no_gid ); - memcpy ( ( ( ( void * ) &wqe->udseg ) + 16 ), - gid, sizeof ( *gid ) ); - MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 8, - destination_qp, av->dest_qp ); - MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 9, - q_key, av->qkey ); - wqe->mpointer[0].local_addr_l = - cpu_to_be32 ( virt_to_bus ( iobuf->data ) ); - wqe->mpointer[0].byte_count = cpu_to_be32 ( iob_len ( iobuf ) ); + memcpy ( &wqe->ud.u.dwords[4], gid, sizeof ( *gid ) ); + MLX_FILL_1 ( &wqe->ud, 8, destination_qp, av->dest_qp ); + MLX_FILL_1 ( &wqe->ud, 9, q_key, av->qkey ); + MLX_FILL_1 ( &wqe->data[0], 3, + local_address_l, virt_to_bus ( iobuf->data ) ); + MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_len ( iobuf ) ); DBG ( "Work queue entry:\n" ); DBG_HD ( wqe, sizeof ( *wqe ) ); /* Update previous work queue entry's "next" field */ - nds = ( ( offsetof ( typeof ( *wqe ), mpointer ) + - sizeof ( wqe->mpointer[0] ) ) >> 4 ); - MLX_MODIFY ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 0, - nopcode, XDEV_NOPCODE_SEND ); - MLX_POPULATE_3 ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 1, - nds, nds, - f, 1, - always1, 1 ); + nds = ( ( offsetof ( typeof ( *wqe ), data ) + + sizeof ( wqe->data[0] ) ) >> 4 ); + MLX_SET ( &prev_wqe->next, nopcode, XDEV_NOPCODE_SEND ); + MLX_FILL_3 ( &prev_wqe->next, 1, + nds, nds, + f, 1, + always1, 1 ); DBG ( "Previous work queue entry's next field:\n" ); - DBG_HD ( &prev_wqe->next.next, sizeof ( prev_wqe->next.next ) ); + DBG_HD ( &prev_wqe->next, sizeof ( prev_wqe->next ) ); /* Update doorbell record */ db_rec = &arbel->db_rec[arbel_wq->doorbell_idx]; - MLX_POPULATE_1 ( db_rec, arbelprm_qp_db_record_st, 0, - counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); + MLX_FILL_1 ( &db_rec->qp, 0, + counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); barrier(); DBG ( "Doorbell record:\n" ); DBG_HD ( db_rec, 8 ); /* Ring doorbell register */ - MLX_POPULATE_4 ( &db_reg, arbelprm_send_doorbell_st, 0, - nopcode, XDEV_NOPCODE_SEND, - f, 1, - wqe_counter, ( wq->next_idx & 0xffff ), - wqe_cnt, 1 ); - MLX_POPULATE_2 ( &db_reg, arbelprm_send_doorbell_st, 1, - nds, nds, - qpn, qp->qpn ); + MLX_FILL_4 ( &db_reg.send, 0, + nopcode, XDEV_NOPCODE_SEND, + f, 1, + wqe_counter, ( wq->next_idx & 0xffff ), + wqe_cnt, 1 ); + MLX_FILL_2 ( &db_reg.send, 1, + nds, nds, + qpn, qp->qpn ); arbel_ring_doorbell ( arbel, &db_reg, POST_SND_OFFSET ); /* Update work queue's index */ @@ -437,13 +430,12 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, } static void arbel_parse_completion ( struct arbel *arbel, - union cqe_st *cqe, + union arbelprm_completion_entry *cqe, struct ib_completion *completion ) { memset ( completion, 0, sizeof ( *completion ) ); - is_send = MLX_EXTRACT ( cqe, arbelprm_completion_queue_entry_st, s ); - completion->len = - MLX_EXTRACT ( cqe, arbelprm_completion_queue_entry_st, - byte_cnt );} + completion->is_send = MLX_GET ( &cqe->normal, s ); + completion->len = MLX_GET ( &cqe->normal, byte_cnt ); +} /** * Poll completion queue @@ -459,8 +451,8 @@ static void arbel_poll_cq ( struct ib_device *ibdev, struct arbel *arbel = ibdev->priv; struct arbel_completion_queue *arbel_cq = cq->priv; unsigned int cqe_idx_mask = ( cq->num_cqes - 1 ); - union db_record_st *db_rec = &arbel->db_rec[arbel_cq->doorbell_idx]; - union cqe_st *cqe; + union arbelprm_doorbell_record *db_rec; + union arbelprm_completion_entry *cqe; struct ib_completion completion; struct io_buffer *iobuf; int is_send; @@ -468,8 +460,7 @@ static void arbel_poll_cq ( struct ib_device *ibdev, while ( 1 ) { /* Look for completion entry */ cqe = &arbel_cq->cqe[cq->next_idx & cqe_idx_mask]; - if ( MLX_EXTRACT ( cqe, arbelprm_completion_queue_entry_st, - owner ) != 0 ) { + if ( MLX_GET ( &cqe->normal, owner ) != 0 ) { /* Entry still owned by hardware; end of poll */ break; } @@ -484,14 +475,14 @@ static void arbel_poll_cq ( struct ib_device *ibdev, iobuf ); /* Return ownership to hardware */ - MLX_POPULATE_1 ( cqe, arbelprm_completion_queue_entry_st, 7, - owner, 1 ); + MLX_FILL_1 ( &cqe->normal, 7, owner, 1 ); barrier(); /* Update completion queue's index */ cq->next_idx++; /* Update doorbell record */ - MLX_POPULATE_1 ( db_rec, arbelprm_cq_ci_db_record_st, 0, - counter, ( cq->next_idx & 0xffffffffUL ) ); + db_rec = &arbel->db_rec[arbel_cq->doorbell_idx]; + MLX_FILL_1 ( &db_rec->cq_ci, 0, + counter, ( cq->next_idx & 0xffffffffUL ) ); } } diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index c08191586..72a85d42a 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -113,6 +113,8 @@ struct ib_completion_queue { /** An Infiniband completion */ struct ib_completion { + /** Completion is for send queue */ + int is_send; /** Length */ size_t len; }; -- cgit v1.2.3-55-g7522 From 8deef093d90d8558925cc4c6159b8e2bcf8b02e3 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 20:58:29 +0100 Subject: Direct polling of TX completion queue now works. --- src/drivers/net/mlx_ipoib/arbel.h | 79 ++++++++++++ src/drivers/net/mlx_ipoib/mt25218.c | 251 +++++++++++++++++++++++++++--------- src/include/gpxe/infiniband.h | 44 +++++-- src/net/infiniband.c | 20 +++ 4 files changed, 319 insertions(+), 75 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index e09930448..f35ef26b9 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -1,6 +1,26 @@ #ifndef _ARBEL_H #define _ARBEL_H +/** @file + * + * Mellanox Arbel Infiniband HCA driver + * + */ + +/* + * Hardware constants + * + */ + +#define ARBEL_OPCODE_SEND 0x0a +#define ARBEL_OPCODE_RECV_ERROR 0xfe +#define ARBEL_OPCODE_SEND_ERROR 0xff + +/* + * Wrapper structures for hardware datatypes + * + */ + struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); @@ -12,6 +32,11 @@ struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_data_ptr ); struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_next ); struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ud ); +/* + * Composite hardware datatypes + * + */ + #define ARBELPRM_MAX_GATHER 1 struct arbelprm_ud_send_wqe { @@ -36,4 +61,58 @@ union arbelprm_doorbell_register { uint32_t dword[2]; } __attribute__ (( packed )); +/* + * gPXE-specific definitions + * + */ + +/** Alignment of Arbel send work queue entries */ +#define ARBEL_SEND_WQE_ALIGN 128 + +/** An Arbel send work queue entry */ +union arbel_send_wqe { + struct arbelprm_ud_send_wqe ud; + uint8_t force_align[ARBEL_SEND_WQE_ALIGN]; +} __attribute__ (( packed )); + +/** An Arbel send work queue */ +struct arbel_send_work_queue { + /** Doorbell record number */ + unsigned int doorbell_idx; + /** Work queue entries */ + union arbel_send_wqe *wqe; +}; + +/** Alignment of Arbel receive work queue entries */ +#define ARBEL_RECV_WQE_ALIGN 64 + +/** An Arbel receive work queue entry */ +union arbel_recv_wqe { + uint8_t force_align[ARBEL_RECV_WQE_ALIGN]; +} __attribute__ (( packed )); + +/** An Arbel receive work queue */ +struct arbel_recv_work_queue { + /** Doorbell record number */ + unsigned int doorbell_idx; + /** Work queue entries */ + union arbel_recv_wqe *wqe; +}; + +/** An Arbel completion queue */ +struct arbel_completion_queue { + /** Doorbell record number */ + unsigned int doorbell_idx; + /** Completion queue entries */ + union arbelprm_completion_entry *cqe; +}; + +/** An Arbel device */ +struct arbel { + /** User Access Region */ + void *uar; + /** Doorbell records */ + union arbelprm_doorbell_record *db_rec; +}; + #endif /* _ARBEL_H */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 0453ba79f..be114b947 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -25,28 +25,6 @@ Skeleton NIC driver for Etherboot #include "arbel.h" -struct arbel_send_work_queue { - /** Doorbell record number */ - unsigned int doorbell_idx; - /** Work queue entries */ - // struct ud_send_wqe_st *wqe; - union ud_send_wqe_u *wqe_u; -}; - -struct arbel_completion_queue { - /** Doorbell record number */ - unsigned int doorbell_idx; - /** Completion queue entries */ - union arbelprm_completion_entry *cqe; -}; - -struct arbel { - /** User Access Region */ - void *uar; - /** Doorbell records */ - union arbelprm_doorbell_record *db_rec; -}; - struct mlx_nic { @@ -119,9 +97,10 @@ static int mlx_transmit ( struct net_device *netdev, return 0; } -static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, +static int arbel_post_send ( struct ib_device *ibdev, + struct ib_queue_pair *qp, struct ib_address_vector *av, - struct ib_queue_pair *qp ); + struct io_buffer *iobuf ); static struct io_buffer *tx_ring[NUM_IPOIB_SND_WQES]; static int next_tx_idx = 0; @@ -137,10 +116,10 @@ static int mlx_transmit_direct ( struct net_device *netdev, }; struct arbel_send_work_queue arbel_send_queue = { .doorbell_idx = IPOIB_SND_QP_DB_IDX, - .wqe_u = ( (struct udqp_st *) mlx->ipoib_qph )->snd_wq, + .wqe = ( (struct udqp_st *) mlx->ipoib_qph )->snd_wq, }; struct ib_device ibdev = { - .priv = &arbel, + .dev_priv = &arbel, }; struct ib_queue_pair qp = { .qpn = ib_get_qpn ( mlx->ipoib_qph ), @@ -148,7 +127,7 @@ static int mlx_transmit_direct ( struct net_device *netdev, .num_wqes = NUM_IPOIB_SND_WQES, .next_idx = next_tx_idx, .iobufs = tx_ring, - .priv = &arbel_send_queue, + .dev_priv = &arbel_send_queue, }, }; struct ud_av_st *bcast_av = mlx->bcast_av; @@ -164,7 +143,7 @@ static int mlx_transmit_direct ( struct net_device *netdev, }; memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); - rc = arbel_post_send ( &ibdev, iobuf, &av, &qp ); + rc = arbel_post_send ( &ibdev, &qp, &av, iobuf ); next_tx_idx = qp.send.next_idx; @@ -172,6 +151,75 @@ static int mlx_transmit_direct ( struct net_device *netdev, } +static void arbel_poll_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq, + ib_completer_t complete_send, + ib_completer_t complete_recv ); + +static void temp_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->priv; + + DBG ( "Wahey! TX completion\n" ); + netdev_tx_complete_err ( netdev, iobuf, + ( completion->syndrome ? -EIO : 0 ) ); +} + +static void temp_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp __unused, + struct ib_completion *completion __unused, + struct io_buffer *iobuf __unused ) { + DBG ( "AARGH! recv completion\n" ); +} + +static int next_cq_idx = 0; + +static void mlx_poll_cq_direct ( struct net_device *netdev ) { + struct mlx_nic *mlx = netdev->priv; + + struct arbel arbel = { + .uar = memfree_pci_dev.uar, + .db_rec = dev_ib_data.uar_context_base, + }; + struct arbel_send_work_queue arbel_send_queue = { + .doorbell_idx = IPOIB_SND_QP_DB_IDX, + .wqe = ( ( struct udqp_st * ) mlx->ipoib_qph )->snd_wq, + }; + struct ib_device ibdev = { + .dev_priv = &arbel, + }; + struct ib_queue_pair qp = { + .qpn = ib_get_qpn ( mlx->ipoib_qph ), + .send = { + .num_wqes = NUM_IPOIB_SND_WQES, + .next_idx = next_tx_idx, + .iobufs = tx_ring, + .dev_priv = &arbel_send_queue, + }, + .priv = netdev, + }; + struct arbel_completion_queue arbel_cq = { + .doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, + .cqe = ( ( struct cq_st * ) mlx->snd_cqh )->cq_buf, + }; + struct ib_completion_queue cq = { + .cqn = 1234, + .num_cqes = NUM_IPOIB_SND_CQES, + .next_idx = next_cq_idx, + .dev_priv = &arbel_cq, + }; + + INIT_LIST_HEAD ( &cq.queue_pairs ); + INIT_LIST_HEAD ( &qp.list ); + list_add ( &qp.list, &cq.queue_pairs ); + + arbel_poll_cq ( &ibdev, &cq, temp_complete_send, temp_complete_recv ); + + next_cq_idx = cq.next_idx; +} + /** * Handle TX completion * @@ -276,7 +324,11 @@ static void mlx_poll ( struct net_device *netdev ) { } /* Poll completion queues */ +#if 0 mlx_poll_cq ( netdev, mlx->snd_cqh, mlx_tx_complete ); +#else + mlx_poll_cq_direct ( netdev ); +#endif mlx_poll_cq ( netdev, mlx->rcv_cqh, mlx_rx_complete ); } @@ -336,17 +388,18 @@ static void arbel_ring_doorbell ( struct arbel *arbel, * Post send work queue entry * * @v ibdev Infiniband device - * @v iobuf I/O buffer - * @v av Address vector * @v qp Queue pair + * @v av Address vector + * @v iobuf I/O buffer * @ret rc Return status code */ -static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, +static int arbel_post_send ( struct ib_device *ibdev, + struct ib_queue_pair *qp, struct ib_address_vector *av, - struct ib_queue_pair *qp ) { - struct arbel *arbel = ibdev->priv; + struct io_buffer *iobuf ) { + struct arbel *arbel = ibdev->dev_priv; struct ib_work_queue *wq = &qp->send; - struct arbel_send_work_queue *arbel_wq = wq->priv; + struct arbel_send_work_queue *arbel_send_wq = wq->dev_priv; struct arbelprm_ud_send_wqe *prev_wqe; struct arbelprm_ud_send_wqe *wqe; union arbelprm_doorbell_record *db_rec; @@ -358,12 +411,12 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, /* Allocate work queue entry */ wqe_idx_mask = ( wq->num_wqes - 1 ); if ( wq->iobufs[wq->next_idx & wqe_idx_mask] ) { - DBGC ( arbel, "ARBEL %p send queue full", arbel ); + DBGC ( arbel, "Arbel %p send queue full", arbel ); return -ENOBUFS; } wq->iobufs[wq->next_idx & wqe_idx_mask] = iobuf; - prev_wqe = &arbel_wq->wqe_u[(wq->next_idx - 1) & wqe_idx_mask].wqe_cont.wqe; - wqe = &arbel_wq->wqe_u[wq->next_idx & wqe_idx_mask].wqe_cont.wqe; + prev_wqe = &arbel_send_wq->wqe[(wq->next_idx - 1) & wqe_idx_mask].ud; + wqe = &arbel_send_wq->wqe[wq->next_idx & wqe_idx_mask].ud; /* Construct work queue entry */ MLX_FILL_1 ( &wqe->next, 1, always1, 1 ); @@ -395,7 +448,7 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, /* Update previous work queue entry's "next" field */ nds = ( ( offsetof ( typeof ( *wqe ), data ) + sizeof ( wqe->data[0] ) ) >> 4 ); - MLX_SET ( &prev_wqe->next, nopcode, XDEV_NOPCODE_SEND ); + MLX_SET ( &prev_wqe->next, nopcode, ARBEL_OPCODE_SEND ); MLX_FILL_3 ( &prev_wqe->next, 1, nds, nds, f, 1, @@ -405,7 +458,7 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, DBG_HD ( &prev_wqe->next, sizeof ( prev_wqe->next ) ); /* Update doorbell record */ - db_rec = &arbel->db_rec[arbel_wq->doorbell_idx]; + db_rec = &arbel->db_rec[arbel_send_wq->doorbell_idx]; MLX_FILL_1 ( &db_rec->qp, 0, counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); barrier(); @@ -414,7 +467,7 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, /* Ring doorbell register */ MLX_FILL_4 ( &db_reg.send, 0, - nopcode, XDEV_NOPCODE_SEND, + nopcode, ARBEL_OPCODE_SEND, f, 1, wqe_counter, ( wq->next_idx & 0xffff ), wqe_cnt, 1 ); @@ -429,50 +482,126 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, return 0; } -static void arbel_parse_completion ( struct arbel *arbel, - union arbelprm_completion_entry *cqe, - struct ib_completion *completion ) { - memset ( completion, 0, sizeof ( *completion ) ); - completion->is_send = MLX_GET ( &cqe->normal, s ); - completion->len = MLX_GET ( &cqe->normal, byte_cnt ); -} +/** + * Handle completion + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @v cqe Hardware completion queue entry + * @v complete_send Send completion handler + * @v complete_recv Receive completion handler + * @ret rc Return status code + */ +static int arbel_complete ( struct ib_device *ibdev, + struct ib_completion_queue *cq, + union arbelprm_completion_entry *cqe, + ib_completer_t complete_send, + ib_completer_t complete_recv ) { + struct arbel *arbel = ibdev->dev_priv; + struct ib_completion completion; + struct ib_queue_pair *qp; + struct ib_work_queue *wq; + struct io_buffer *iobuf; + struct arbel_send_work_queue *arbel_send_wq; + struct arbel_recv_work_queue *arbel_recv_wq; + ib_completer_t complete; + unsigned int opcode; + unsigned long qpn; + unsigned int is_send; + unsigned long wqe_adr; + unsigned int wqe_idx; + int rc = 0; + + /* Parse completion */ + memset ( &completion, 0, sizeof ( completion ) ); + completion.len = MLX_GET ( &cqe->normal, byte_cnt ); + qpn = MLX_GET ( &cqe->normal, my_qpn ); + is_send = MLX_GET ( &cqe->normal, s ); + wqe_adr = ( MLX_GET ( &cqe->normal, wqe_adr ) << 6 ); + opcode = MLX_GET ( &cqe->normal, opcode ); + if ( opcode >= ARBEL_OPCODE_RECV_ERROR ) { + /* "s" field is not valid for error opcodes */ + is_send = ( opcode == ARBEL_OPCODE_SEND_ERROR ); + completion.syndrome = MLX_GET ( &cqe->error, syndrome ); + DBGC ( arbel, "Arbel %p CPN %lx syndrome %x vendor %lx\n", + arbel, cq->cqn, completion.syndrome, + MLX_GET ( &cqe->error, vendor_code ) ); + rc = -EIO; + /* Don't return immediately; propagate error to completer */ + } + + /* Identify queue pair */ + qp = ib_find_qp ( &cq->queue_pairs, qpn ); + if ( ! qp ) { + DBGC ( arbel, "Arbel %p CQN %lx unknown QPN %lx\n", + arbel, cq->cqn, qpn ); + return -EIO; + } + + /* Identify work queue entry index */ + if ( is_send ) { + wq = &qp->send; + arbel_send_wq = wq->dev_priv; + wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_send_wq->wqe ) ) / + sizeof ( arbel_send_wq->wqe[0] ) ); + } else { + wq = &qp->recv; + arbel_recv_wq = wq->dev_priv; + wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_recv_wq->wqe ) ) / + sizeof ( arbel_recv_wq->wqe[0] ) ); + } + + /* Identify I/O buffer */ + iobuf = wq->iobufs[wqe_idx]; + if ( ! iobuf ) { + DBGC ( arbel, "Arbel %p CQN %lx QPN %lx empty WQE %x\n", + arbel, cq->cqn, qpn, wqe_idx ); + return -EIO; + } + wq->iobufs[wqe_idx] = NULL; + + /* Pass off to caller's completion handler */ + complete = ( is_send ? complete_send : complete_recv ); + complete ( ibdev, qp, &completion, iobuf ); + + return rc; +} /** * Poll completion queue * * @v ibdev Infiniband device * @v cq Completion queue - * @v complete Completion handler + * @v complete_send Send completion handler + * @v complete_recv Receive completion handler */ static void arbel_poll_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq, ib_completer_t complete_send, ib_completer_t complete_recv ) { - struct arbel *arbel = ibdev->priv; - struct arbel_completion_queue *arbel_cq = cq->priv; - unsigned int cqe_idx_mask = ( cq->num_cqes - 1 ); + struct arbel *arbel = ibdev->dev_priv; + struct arbel_completion_queue *arbel_cq = cq->dev_priv; union arbelprm_doorbell_record *db_rec; union arbelprm_completion_entry *cqe; - struct ib_completion completion; - struct io_buffer *iobuf; - int is_send; + unsigned int cqe_idx_mask; + int rc; while ( 1 ) { /* Look for completion entry */ + cqe_idx_mask = ( cq->num_cqes - 1 ); cqe = &arbel_cq->cqe[cq->next_idx & cqe_idx_mask]; if ( MLX_GET ( &cqe->normal, owner ) != 0 ) { /* Entry still owned by hardware; end of poll */ break; } - /* Parse completion */ - - - /* Handle completion */ - ( is_send ? complete_send : complete_recv ) ( ibdev, - &completion, - iobuf ); + if ( ( rc = arbel_complete ( ibdev, cq, cqe, complete_send, + complete_recv ) ) != 0 ) { + DBGC ( arbel, "Arbel %p failed to complete: %s\n", + arbel, strerror ( rc ) ); + DBGC_HD ( arbel, cqe, sizeof ( *cqe ) ); + } /* Return ownership to hardware */ MLX_FILL_1 ( &cqe->normal, 7, owner, 1 ); diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 72a85d42a..3679a1103 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -79,24 +79,30 @@ struct ib_work_queue { unsigned long next_idx; /** I/O buffers assigned to work queue */ struct io_buffer **iobufs; - /** Driver private data */ - void *priv; + /** Device private data */ + void *dev_priv; }; /** An Infiniband Queue Pair */ struct ib_queue_pair { + /** List of queue pairs sharing a completion queue */ + struct list_head list; /** Queue Pair Number */ - uint32_t qpn; + unsigned long qpn; /** Send queue */ struct ib_work_queue send; /** Receive queue */ struct ib_work_queue recv; - /** Driver private data */ + /** Queue owner private data */ void *priv; + /** Device private data */ + void *dev_priv; }; /** An Infiniband Completion Queue */ struct ib_completion_queue { + /** Completion queue number */ + unsigned long cqn; /** Number of completion queue entries */ unsigned int num_cqes; /** Next completion queue entry index @@ -107,14 +113,19 @@ struct ib_completion_queue { * array index. */ unsigned long next_idx; - /** Driver private data */ - void *priv; + /** List of associated queue pairs */ + struct list_head queue_pairs; + /** Device private data */ + void *dev_priv; }; /** An Infiniband completion */ struct ib_completion { - /** Completion is for send queue */ - int is_send; + /** Syndrome + * + * If non-zero, then the completion is in error. + */ + unsigned int syndrome; /** Length */ size_t len; }; @@ -122,10 +133,12 @@ struct ib_completion { /** An Infiniband completion handler * * @v ibdev Infiniband device + * @v qp Queue pair * @v completion Completion * @v iobuf I/O buffer */ typedef void ( * ib_completer_t ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, struct ib_completion *completion, struct io_buffer *iobuf ); @@ -156,9 +169,9 @@ struct ib_device_operations { /** Post send work queue entry * * @v ibdev Infiniband device - * @v iobuf I/O buffer - * @v av Address vector * @v qp Queue pair + * @v av Address vector + * @v iobuf I/O buffer * @ret rc Return status code * * If this method returns success, the I/O buffer remains @@ -167,9 +180,9 @@ struct ib_device_operations { * interpreted as "failure to enqueue buffer". */ int ( * post_send ) ( struct ib_device *ibdev, - struct io_buffer *iobuf, + struct ib_queue_pair *qp, struct ib_address_vector *av, - struct ib_queue_pair *qp ); + struct io_buffer *iobuf ); /** Poll completion queue * * @v ibdev Infiniband device @@ -187,11 +200,14 @@ struct ib_device_operations { /** An Infiniband device */ struct ib_device { - /** Driver private data */ - void *priv; + /** Device private data */ + void *dev_priv; }; +extern struct ib_queue_pair * ib_find_qp ( struct list_head *list, + unsigned long qpn ); + extern struct ll_protocol infiniband_protocol; diff --git a/src/net/infiniband.c b/src/net/infiniband.c index 52811b921..edc93b6e3 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -33,6 +33,26 @@ * */ +/** + * Find queue pair from a list + * + * @v list List of queue pairs + * @v qpn Queue pair number + * @ret qp Queue pair, or NULL if not found + */ +struct ib_queue_pair * ib_find_qp ( struct list_head *list, + unsigned long qpn ) { + struct ib_queue_pair *qp; + + list_for_each_entry ( qp, list, list ) { + if ( qp->qpn == qpn ) + return qp; + } + return NULL; +} + + + /** Infiniband broadcast MAC address */ static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; -- cgit v1.2.3-55-g7522 From 37fc40bc8cd857a5e922b21b9e41580b39091c76 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 23:33:25 +0100 Subject: post_recv() now works, and we can pass data on the IPoIB queue pair using entirely our own code. --- src/drivers/net/mlx_ipoib/arbel.h | 27 +++- src/drivers/net/mlx_ipoib/ib_mt25218.c | 2 +- src/drivers/net/mlx_ipoib/mt25218.c | 224 ++++++++++++++++++++++++--------- src/include/gpxe/infiniband.h | 36 +++++- src/net/infiniband.c | 19 +-- 5 files changed, 234 insertions(+), 74 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index f35ef26b9..9da6bef99 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -25,6 +25,7 @@ struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_recv_wqe_segment_next ); struct MLX_DECLARE_STRUCT ( arbelprm_send_doorbell ); struct MLX_DECLARE_STRUCT ( arbelprm_ud_address_vector ); struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ctrl_send ); @@ -37,13 +38,28 @@ struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ud ); * */ -#define ARBELPRM_MAX_GATHER 1 +#define ARBEL_MAX_GATHER 1 struct arbelprm_ud_send_wqe { struct arbelprm_wqe_segment_next next; struct arbelprm_wqe_segment_ctrl_send ctrl; struct arbelprm_wqe_segment_ud ud; - struct arbelprm_wqe_segment_data_ptr data[ARBELPRM_MAX_GATHER]; + struct arbelprm_wqe_segment_data_ptr data[ARBEL_MAX_GATHER]; +} __attribute__ (( packed )); + +#define ARBEL_MAX_SCATTER 1 + +struct arbelprm_recv_wqe { + /* The autogenerated header is inconsistent between send and + * receive WQEs. The "ctrl" structure for receive WQEs is + * defined to include the "next" structure. Since the "ctrl" + * part of the "ctrl" structure contains only "reserved, must + * be zero" bits, we ignore its definition and provide + * something more usable. + */ + struct arbelprm_recv_wqe_segment_next next; + uint32_t ctrl[2]; /* All "reserved, must be zero" */ + struct arbelprm_wqe_segment_data_ptr data[ARBEL_MAX_SCATTER]; } __attribute__ (( packed )); union arbelprm_completion_entry { @@ -88,6 +104,7 @@ struct arbel_send_work_queue { /** An Arbel receive work queue entry */ union arbel_recv_wqe { + struct arbelprm_recv_wqe recv; uint8_t force_align[ARBEL_RECV_WQE_ALIGN]; } __attribute__ (( packed )); @@ -113,6 +130,12 @@ struct arbel { void *uar; /** Doorbell records */ union arbelprm_doorbell_record *db_rec; + /** Reserved LKey + * + * Used to get unrestricted memory access. + */ + unsigned long reserved_lkey; + }; #endif /* _ARBEL_H */ diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index b6552f9f6..45d7f46ff 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -1311,7 +1311,7 @@ static int create_ipoib_qp(void **qp_pp, qp->rcv_buf_sz = IPOIB_RCV_BUF_SZ; qp->max_recv_wqes = NUM_IPOIB_RCV_WQES; - qp->recv_wqe_cur_free = NUM_IPOIB_RCV_WQES; + qp->recv_wqe_cur_free = 0; //NUM_IPOIB_RCV_WQES; qp->rcv_uar_context = dev_ib_data.uar_context_base + 8 * IPOIB_RCV_QP_DB_IDX; diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 385427fbb..8fdc59090 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -26,6 +26,7 @@ Skeleton NIC driver for Etherboot #include "arbel.h" +#define MLX_RX_MAX_FILL NUM_IPOIB_RCV_WQES struct mlx_nic { /** Queue pair handle */ @@ -36,35 +37,65 @@ struct mlx_nic { cq_t snd_cqh; /** Receive completion queue */ cq_t rcv_cqh; + + /** RX fill level */ + unsigned int rx_fill; }; static struct io_buffer *static_ipoib_tx_ring[NUM_IPOIB_SND_WQES]; +static struct io_buffer *static_ipoib_rx_ring[NUM_IPOIB_RCV_WQES]; static struct arbel static_arbel; static struct arbel_send_work_queue static_arbel_ipoib_send_wq = { .doorbell_idx = IPOIB_SND_QP_DB_IDX, }; +static struct arbel_send_work_queue static_arbel_ipoib_recv_wq = { + .doorbell_idx = IPOIB_RCV_QP_DB_IDX, +}; static struct arbel_completion_queue static_arbel_ipoib_send_cq = { .doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, }; +static struct arbel_completion_queue static_arbel_ipoib_recv_cq = { + .doorbell_idx = IPOIB_RCV_CQ_CI_DB_IDX, +}; +static struct ib_completion_queue static_ipoib_send_cq; +static struct ib_completion_queue static_ipoib_recv_cq; static struct ib_device static_ibdev = { .dev_priv = &static_arbel, }; static struct ib_queue_pair static_ipoib_qp = { .send = { + .qp = &static_ipoib_qp, + .is_send = 1, + .cq = &static_ipoib_send_cq, .num_wqes = NUM_IPOIB_SND_WQES, .iobufs = static_ipoib_tx_ring, .dev_priv = &static_arbel_ipoib_send_wq, + .list = LIST_HEAD_INIT ( static_ipoib_qp.send.list ), + }, + .recv = { + .qp = &static_ipoib_qp, + .is_send = 0, + .cq = &static_ipoib_recv_cq, + .num_wqes = NUM_IPOIB_RCV_WQES, + .iobufs = static_ipoib_rx_ring, + .dev_priv = &static_arbel_ipoib_recv_wq, + .list = LIST_HEAD_INIT ( static_ipoib_qp.recv.list ), }, - .list = LIST_HEAD_INIT ( static_ipoib_qp.list ), }; static struct ib_completion_queue static_ipoib_send_cq = { .cqn = 1234, /* Only used for debug messages */ .num_cqes = NUM_IPOIB_SND_CQES, .dev_priv = &static_arbel_ipoib_send_cq, - .queue_pairs = LIST_HEAD_INIT ( static_ipoib_send_cq.queue_pairs ), + .work_queues = LIST_HEAD_INIT ( static_ipoib_send_cq.work_queues ), +}; +static struct ib_completion_queue static_ipoib_recv_cq = { + .cqn = 2345, /* Only used for debug messages */ + .num_cqes = NUM_IPOIB_RCV_CQES, + .dev_priv = &static_arbel_ipoib_recv_cq, + .work_queues = LIST_HEAD_INIT ( static_ipoib_recv_cq.work_queues ), }; @@ -157,36 +188,6 @@ static int mlx_transmit_direct ( struct net_device *netdev, } -static void arbel_poll_cq ( struct ib_device *ibdev, - struct ib_completion_queue *cq, - ib_completer_t complete_send, - ib_completer_t complete_recv ); - -static void temp_complete_send ( struct ib_device *ibdev __unused, - struct ib_queue_pair *qp, - struct ib_completion *completion, - struct io_buffer *iobuf ) { - struct net_device *netdev = qp->priv; - - DBG ( "Wahey! TX completion\n" ); - netdev_tx_complete_err ( netdev, iobuf, - ( completion->syndrome ? -EIO : 0 ) ); -} - -static void temp_complete_recv ( struct ib_device *ibdev __unused, - struct ib_queue_pair *qp __unused, - struct ib_completion *completion __unused, - struct io_buffer *iobuf __unused ) { - DBG ( "AARGH! recv completion\n" ); -} - -static void mlx_poll_cq_direct ( struct net_device *netdev ) { - struct mlx_nic *mlx = netdev->priv; - - arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq, - temp_complete_send, temp_complete_recv ); -} - /** * Handle TX completion * @@ -233,6 +234,44 @@ static void mlx_rx_complete ( struct net_device *netdev, netdev_rx ( netdev, iobuf ); } +static void arbel_poll_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq, + ib_completer_t complete_send, + ib_completer_t complete_recv ); + +static void temp_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->priv; + + DBG ( "Wahey! TX completion\n" ); + netdev_tx_complete_err ( netdev, iobuf, + ( completion->syndrome ? -EIO : 0 ) ); +} + +static void temp_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->priv; + struct mlx_nic *mlx = netdev->priv; + + DBG ( "Yay! RX completion on %p len %zx:\n", iobuf, completion->len ); + // DBG_HD ( iobuf, sizeof ( *iobuf ) ); + // DBG_HD ( iobuf->data, 256 ); + if ( completion->syndrome ) { + netdev_rx_err ( netdev, iobuf, -EIO ); + } else { + iob_put ( iobuf, completion->len ); + iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); + netdev_rx ( netdev, iobuf ); + } + + mlx->rx_fill--; +} + +#if 0 /** * Poll completion queue * @@ -267,6 +306,32 @@ static void mlx_poll_cq ( struct net_device *netdev, cq_t cq, free_wqe ( ib_cqe.wqe ); } } +#endif + +static int arbel_post_recv ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct io_buffer *iobuf ); + +static void mlx_refill_rx ( struct net_device *netdev ) { + struct mlx_nic *mlx = netdev->priv; + struct io_buffer *iobuf; + int rc; + + while ( mlx->rx_fill < MLX_RX_MAX_FILL ) { + iobuf = alloc_iob ( 2048 ); + if ( ! iobuf ) + break; + DBG ( "Posting RX buffer %p:\n", iobuf ); + // memset ( iobuf->data, 0xaa, 256 ); + // DBG_HD ( iobuf, sizeof ( *iobuf ) ); + if ( ( rc = arbel_post_recv ( &static_ibdev, &static_ipoib_qp, + iobuf ) ) != 0 ) { + free_iob ( iobuf ); + break; + } + mlx->rx_fill++; + } +} /** * Poll for completed and received packets @@ -291,8 +356,13 @@ static void mlx_poll ( struct net_device *netdev ) { } /* Poll completion queues */ - mlx_poll_cq_direct ( netdev ); - mlx_poll_cq ( netdev, mlx->rcv_cqh, mlx_rx_complete ); + arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq, + temp_complete_send, temp_complete_recv ); + arbel_poll_cq ( &static_ibdev, &static_ipoib_recv_cq, + temp_complete_send, temp_complete_recv ); + // mlx_poll_cq ( netdev, mlx->rcv_cqh, mlx_rx_complete ); + + mlx_refill_rx ( netdev ); } /** @@ -397,12 +467,9 @@ static int arbel_post_send ( struct ib_device *ibdev, memcpy ( &wqe->ud.u.dwords[4], gid, sizeof ( *gid ) ); MLX_FILL_1 ( &wqe->ud, 8, destination_qp, av->dest_qp ); MLX_FILL_1 ( &wqe->ud, 9, q_key, av->qkey ); + MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_len ( iobuf ) ); MLX_FILL_1 ( &wqe->data[0], 3, local_address_l, virt_to_bus ( iobuf->data ) ); - MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_len ( iobuf ) ); - - DBG ( "Work queue entry:\n" ); - DBG_HD ( wqe, sizeof ( *wqe ) ); /* Update previous work queue entry's "next" field */ nds = ( ( offsetof ( typeof ( *wqe ), data ) + @@ -413,16 +480,11 @@ static int arbel_post_send ( struct ib_device *ibdev, f, 1, always1, 1 ); - DBG ( "Previous work queue entry's next field:\n" ); - DBG_HD ( &prev_wqe->next, sizeof ( prev_wqe->next ) ); - /* Update doorbell record */ + barrier(); db_rec = &arbel->db_rec[arbel_send_wq->doorbell_idx]; MLX_FILL_1 ( &db_rec->qp, 0, counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); - barrier(); - DBG ( "Doorbell record:\n" ); - DBG_HD ( db_rec, 8 ); /* Ring doorbell register */ MLX_FILL_4 ( &db_reg.send, 0, @@ -441,6 +503,51 @@ static int arbel_post_send ( struct ib_device *ibdev, return 0; } +/** + * Post receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int arbel_post_recv ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct io_buffer *iobuf ) { + struct arbel *arbel = ibdev->dev_priv; + struct ib_work_queue *wq = &qp->recv; + struct arbel_recv_work_queue *arbel_recv_wq = wq->dev_priv; + struct arbelprm_recv_wqe *wqe; + union arbelprm_doorbell_record *db_rec; + unsigned int wqe_idx_mask; + + /* Allocate work queue entry */ + wqe_idx_mask = ( wq->num_wqes - 1 ); + if ( wq->iobufs[wq->next_idx & wqe_idx_mask] ) { + DBGC ( arbel, "Arbel %p receive queue full", arbel ); + return -ENOBUFS; + } + wq->iobufs[wq->next_idx & wqe_idx_mask] = iobuf; + wqe = &arbel_recv_wq->wqe[wq->next_idx & wqe_idx_mask].recv; + + /* Construct work queue entry */ + MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_tailroom ( iobuf ) ); + MLX_FILL_1 ( &wqe->data[0], 1, l_key, arbel->reserved_lkey ); + MLX_FILL_1 ( &wqe->data[0], 3, + local_address_l, virt_to_bus ( iobuf->data ) ); + + /* Update doorbell record */ + barrier(); + db_rec = &arbel->db_rec[arbel_recv_wq->doorbell_idx]; + MLX_FILL_1 ( &db_rec->qp, 0, + counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); + + /* Update work queue's index */ + wq->next_idx++; + + return 0; +} + /** * Handle completion * @@ -458,7 +565,6 @@ static int arbel_complete ( struct ib_device *ibdev, ib_completer_t complete_recv ) { struct arbel *arbel = ibdev->dev_priv; struct ib_completion completion; - struct ib_queue_pair *qp; struct ib_work_queue *wq; struct io_buffer *iobuf; struct arbel_send_work_queue *arbel_send_wq; @@ -466,7 +572,7 @@ static int arbel_complete ( struct ib_device *ibdev, ib_completer_t complete; unsigned int opcode; unsigned long qpn; - unsigned int is_send; + int is_send; unsigned long wqe_adr; unsigned int wqe_idx; int rc = 0; @@ -489,22 +595,20 @@ static int arbel_complete ( struct ib_device *ibdev, /* Don't return immediately; propagate error to completer */ } - /* Identify queue pair */ - qp = ib_find_qp ( &cq->queue_pairs, qpn ); - if ( ! qp ) { - DBGC ( arbel, "Arbel %p CQN %lx unknown QPN %lx\n", - arbel, cq->cqn, qpn ); + /* Identify work queue */ + wq = ib_find_wq ( cq, qpn, is_send ); + if ( ! wq ) { + DBGC ( arbel, "Arbel %p CQN %lx unknown %s QPN %lx\n", + arbel, cq->cqn, ( is_send ? "send" : "recv" ), qpn ); return -EIO; } /* Identify work queue entry index */ if ( is_send ) { - wq = &qp->send; arbel_send_wq = wq->dev_priv; wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_send_wq->wqe ) ) / sizeof ( arbel_send_wq->wqe[0] ) ); } else { - wq = &qp->recv; arbel_recv_wq = wq->dev_priv; wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_recv_wq->wqe ) ) / sizeof ( arbel_recv_wq->wqe[0] ) ); @@ -521,7 +625,7 @@ static int arbel_complete ( struct ib_device *ibdev, /* Pass off to caller's completion handler */ complete = ( is_send ? complete_send : complete_recv ); - complete ( ibdev, qp, &completion, iobuf ); + complete ( ibdev, wq->qp, &completion, iobuf ); return rc; } @@ -577,6 +681,7 @@ static void arbel_poll_cq ( struct ib_device *ibdev, /** Arbel Infiniband operations */ static struct ib_device_operations arbel_ib_operations = { .post_send = arbel_post_send, + .post_recv = arbel_post_recv, .poll_cq = arbel_poll_cq, }; @@ -636,14 +741,21 @@ static int arbel_probe ( struct pci_device *pci, /* Hack up IB structures */ static_arbel.uar = memfree_pci_dev.uar; static_arbel.db_rec = dev_ib_data.uar_context_base; + static_arbel.reserved_lkey = dev_ib_data.mkey; static_arbel_ipoib_send_wq.wqe = ( ( struct udqp_st * ) qph )->snd_wq; + static_arbel_ipoib_recv_wq.wqe = + ( ( struct udqp_st * ) qph )->rcv_wq; static_arbel_ipoib_send_cq.cqe = ( ( struct cq_st * ) ib_data.ipoib_snd_cq )->cq_buf; + static_arbel_ipoib_recv_cq.cqe = + ( ( struct cq_st * ) ib_data.ipoib_rcv_cq )->cq_buf; static_ipoib_qp.qpn = ib_get_qpn ( qph ); static_ipoib_qp.priv = netdev; - list_add ( &static_ipoib_qp.list, - &static_ipoib_send_cq.queue_pairs ); + list_add ( &static_ipoib_qp.send.list, + &static_ipoib_send_cq.work_queues ); + list_add ( &static_ipoib_qp.recv.list, + &static_ipoib_recv_cq.work_queues ); /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 3679a1103..85684b635 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -64,9 +64,19 @@ struct ibhdr { struct ib_device; +struct ib_queue_pair; +struct ib_completion_queue; /** An Infiniband Work Queue */ struct ib_work_queue { + /** Containing queue pair */ + struct ib_queue_pair *qp; + /** "Is a send queue" flag */ + int is_send; + /** Associated completion queue */ + struct ib_completion_queue *cq; + /** List of work queues on this completion queue */ + struct list_head list; /** Number of work queue entries */ unsigned int num_wqes; /** Next work queue entry index @@ -85,8 +95,6 @@ struct ib_work_queue { /** An Infiniband Queue Pair */ struct ib_queue_pair { - /** List of queue pairs sharing a completion queue */ - struct list_head list; /** Queue Pair Number */ unsigned long qpn; /** Send queue */ @@ -113,8 +121,8 @@ struct ib_completion_queue { * array index. */ unsigned long next_idx; - /** List of associated queue pairs */ - struct list_head queue_pairs; + /** List of work queues completing to this queue */ + struct list_head work_queues; /** Device private data */ void *dev_priv; }; @@ -183,6 +191,22 @@ struct ib_device_operations { struct ib_queue_pair *qp, struct ib_address_vector *av, struct io_buffer *iobuf ); + /** + * Post receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @ret rc Return status code + * + * If this method returns success, the I/O buffer remains + * owned by the queue pair. If this method returns failure, + * the I/O buffer is immediately released; the failure is + * interpreted as "failure to enqueue buffer". + */ + int ( * post_recv ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct io_buffer *iobuf ); /** Poll completion queue * * @v ibdev Infiniband device @@ -205,8 +229,8 @@ struct ib_device { }; -extern struct ib_queue_pair * ib_find_qp ( struct list_head *list, - unsigned long qpn ); +extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, + unsigned long qpn, int is_send ); diff --git a/src/net/infiniband.c b/src/net/infiniband.c index edc93b6e3..694c88b11 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -34,19 +34,20 @@ */ /** - * Find queue pair from a list + * Find work queue belonging to completion queue * - * @v list List of queue pairs + * @v cq Completion queue * @v qpn Queue pair number - * @ret qp Queue pair, or NULL if not found + * @v is_send Find send work queue (rather than receive) + * @ret wq Work queue, or NULL if not found */ -struct ib_queue_pair * ib_find_qp ( struct list_head *list, - unsigned long qpn ) { - struct ib_queue_pair *qp; +struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, + unsigned long qpn, int is_send ) { + struct ib_work_queue *wq; - list_for_each_entry ( qp, list, list ) { - if ( qp->qpn == qpn ) - return qp; + list_for_each_entry ( wq, &cq->work_queues, list ) { + if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) ) + return wq; } return NULL; } -- cgit v1.2.3-55-g7522 From 156b409ccc2e1e9c08784b49471c60c4950d7603 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 04:02:20 +0100 Subject: Rearrange data structures to maximise embedding (and hence minimise the number of separate allocations that need to be done). --- src/drivers/net/mlx_ipoib/arbel.h | 13 +++ src/drivers/net/mlx_ipoib/mt25218.c | 193 ++++++++++++++++++++++-------------- src/include/gpxe/infiniband.h | 10 +- 3 files changed, 133 insertions(+), 83 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 1cf92bde8..a41b6330e 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -119,8 +119,20 @@ struct arbel_recv_work_queue { union arbel_recv_wqe *wqe; }; +/** An Arbel queue pair */ +struct arbel_queue_pair { + /** Infiniband queue pair */ + struct ib_queue_pair qp; + /** Send work queue */ + struct arbel_send_work_queue send; + /** Receive work queue */ + struct arbel_recv_work_queue recv; +}; + /** An Arbel completion queue */ struct arbel_completion_queue { + /** Infiniband completion queue */ + struct ib_completion_queue cq; /** Doorbell record number */ unsigned int doorbell_idx; /** Completion queue entries */ @@ -154,6 +166,7 @@ struct arbel { */ #define ARBEL_HCR_QUERY_DEV_LIM 0x0003 +#define ARBEL_HCR_SW2HW_CQ 0x0016 #define ARBEL_HCR_BASE 0x80680 #define ARBEL_HCR_REG(x) ( ARBEL_HCR_BASE + 4 * (x) ) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 180ec6da2..4fcc6a3c2 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -52,57 +52,54 @@ static struct io_buffer *static_ipoib_tx_ring[NUM_IPOIB_SND_WQES]; static struct io_buffer *static_ipoib_rx_ring[NUM_IPOIB_RCV_WQES]; static struct arbel static_arbel; -static struct arbel_send_work_queue static_arbel_ipoib_send_wq = { - .doorbell_idx = IPOIB_SND_QP_DB_IDX, -}; -static struct arbel_send_work_queue static_arbel_ipoib_recv_wq = { - .doorbell_idx = IPOIB_RCV_QP_DB_IDX, -}; -static struct arbel_completion_queue static_arbel_ipoib_send_cq = { - .doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, -}; -static struct arbel_completion_queue static_arbel_ipoib_recv_cq = { - .doorbell_idx = IPOIB_RCV_CQ_CI_DB_IDX, -}; - -static struct ib_completion_queue static_ipoib_send_cq; -static struct ib_completion_queue static_ipoib_recv_cq; -static struct ib_device static_ibdev = { - .dev_priv = &static_arbel, -}; -static struct ib_queue_pair static_ipoib_qp = { +static struct arbel_completion_queue static_ipoib_send_cq; +static struct arbel_completion_queue static_ipoib_recv_cq; + +static struct arbel_queue_pair static_ipoib_qp = { + .qp = { + .send = { + .qp = &static_ipoib_qp.qp, + .is_send = 1, + .cq = &static_ipoib_send_cq.cq, + .num_wqes = NUM_IPOIB_SND_WQES, + .iobufs = static_ipoib_tx_ring, + .list = LIST_HEAD_INIT (static_ipoib_qp.qp.send.list), + }, + .recv = { + .qp = &static_ipoib_qp.qp, + .is_send = 0, + .cq = &static_ipoib_recv_cq.cq, + .num_wqes = NUM_IPOIB_RCV_WQES, + .iobufs = static_ipoib_rx_ring, + .list = LIST_HEAD_INIT (static_ipoib_qp.qp.recv.list), + }, + }, .send = { - .qp = &static_ipoib_qp, - .is_send = 1, - .cq = &static_ipoib_send_cq, - .num_wqes = NUM_IPOIB_SND_WQES, - .iobufs = static_ipoib_tx_ring, - .dev_priv = &static_arbel_ipoib_send_wq, - .list = LIST_HEAD_INIT ( static_ipoib_qp.send.list ), + .doorbell_idx = IPOIB_SND_QP_DB_IDX, }, .recv = { - .qp = &static_ipoib_qp, - .is_send = 0, - .cq = &static_ipoib_recv_cq, - .num_wqes = NUM_IPOIB_RCV_WQES, - .iobufs = static_ipoib_rx_ring, - .dev_priv = &static_arbel_ipoib_recv_wq, - .list = LIST_HEAD_INIT ( static_ipoib_qp.recv.list ), + .doorbell_idx = IPOIB_RCV_QP_DB_IDX, }, }; -static struct ib_completion_queue static_ipoib_send_cq = { - .cqn = 1234, /* Only used for debug messages */ - .num_cqes = NUM_IPOIB_SND_CQES, - .dev_priv = &static_arbel_ipoib_send_cq, - .work_queues = LIST_HEAD_INIT ( static_ipoib_send_cq.work_queues ), +static struct arbel_completion_queue static_ipoib_send_cq = { + .cq = { + .cqn = 1234, /* Only used for debug messages */ + .num_cqes = NUM_IPOIB_SND_CQES, + .work_queues = LIST_HEAD_INIT (static_ipoib_send_cq.cq.work_queues), + }, + .doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, }; -static struct ib_completion_queue static_ipoib_recv_cq = { - .cqn = 2345, /* Only used for debug messages */ - .num_cqes = NUM_IPOIB_RCV_CQES, - .dev_priv = &static_arbel_ipoib_recv_cq, - .work_queues = LIST_HEAD_INIT ( static_ipoib_recv_cq.work_queues ), +static struct arbel_completion_queue static_ipoib_recv_cq = { + .cq = { + .cqn = 2345, /* Only used for debug messages */ + .num_cqes = NUM_IPOIB_RCV_CQES, + .work_queues = LIST_HEAD_INIT (static_ipoib_recv_cq.cq.work_queues), + }, + .doorbell_idx = IPOIB_RCV_CQ_CI_DB_IDX, +}; +static struct ib_device static_ibdev = { + .priv = &static_arbel, }; - /** @@ -152,7 +149,7 @@ static int mlx_transmit_direct ( struct net_device *netdev, }; memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); - rc = arbel_post_send ( &static_ibdev, &static_ipoib_qp, &av, iobuf ); + rc = arbel_post_send ( &static_ibdev, &static_ipoib_qp.qp, &av, iobuf ); return rc; } @@ -206,7 +203,8 @@ static void mlx_refill_rx ( struct net_device *netdev ) { if ( ! iobuf ) break; DBG ( "Posting RX buffer %p:\n", iobuf ); - if ( ( rc = arbel_post_recv ( &static_ibdev, &static_ipoib_qp, + if ( ( rc = arbel_post_recv ( &static_ibdev, + &static_ipoib_qp.qp, iobuf ) ) != 0 ) { free_iob ( iobuf ); break; @@ -238,9 +236,9 @@ static void mlx_poll ( struct net_device *netdev ) { } /* Poll completion queues */ - arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq, + arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq.cq, temp_complete_send, temp_complete_recv ); - arbel_poll_cq ( &static_ibdev, &static_ipoib_recv_cq, + arbel_poll_cq ( &static_ibdev, &static_ipoib_recv_cq.cq, temp_complete_send, temp_complete_recv ); // mlx_poll_cq ( netdev, mlx->rcv_cqh, mlx_rx_complete ); @@ -383,27 +381,63 @@ static int arbel_cmd ( struct arbel *arbel, unsigned long command, return 0; } -static int arbel_cmd_query_dev_lim ( struct arbel *arbel, - struct arbelprm_query_dev_lim *out ) { +static inline int +arbel_cmd_query_dev_lim ( struct arbel *arbel, + struct arbelprm_query_dev_lim *dev_lim ) { return arbel_cmd ( arbel, ARBEL_HCR_OUT_CMD ( ARBEL_HCR_QUERY_DEV_LIM, - 1, sizeof ( *out ) ), - 0, NULL, 0, out ); + 1, sizeof ( *dev_lim ) ), + 0, NULL, 0, dev_lim ); +} + +static inline int +arbel_cmd_sw2hw_cq ( struct arbel *arbel, unsigned long cqn, + const struct arbelprm_completion_queue_context *cqctx ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_SW2HW_CQ, + 1, sizeof ( *cqctx ) ), + 0, cqctx, cqn, NULL ); } +/*************************************************************************** + * + * Completion queue operations + * + *************************************************************************** + */ + /** * Create completion queue * * @v ibdev Infiniband device * @v */ -static int arbel_create_cq ( struct ib_device *ibdev ) { - struct arbelprm_completion_queue_context *cqctx; +static int arbel_create_cq ( struct ib_device *ibdev, + struct ib_completion_queue **new_cq ) { + struct arbel *arbel = ibdev->priv; + struct arbelprm_completion_queue_context cqctx; + struct ib_completion_queue *cq; + + cq = zalloc ( sizeof ( *cq ) ); + if ( ! cq ) + return -ENOMEM; + + memset ( &cqctx, 0, sizeof ( cqctx ) ); + + + return arbel_cmd_sw2hw_cq ( arbel, 0, &cqctx ); } +/*************************************************************************** + * + * Work request operations + * + *************************************************************************** + */ + /** * Ring doorbell register in UAR * @@ -438,9 +472,11 @@ static int arbel_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_address_vector *av, struct io_buffer *iobuf ) { - struct arbel *arbel = ibdev->dev_priv; + struct arbel *arbel = ibdev->priv; + struct arbel_queue_pair *arbel_qp + = container_of ( qp, struct arbel_queue_pair, qp ); struct ib_work_queue *wq = &qp->send; - struct arbel_send_work_queue *arbel_send_wq = wq->dev_priv; + struct arbel_send_work_queue *arbel_send_wq = &arbel_qp->send; struct arbelprm_ud_send_wqe *prev_wqe; struct arbelprm_ud_send_wqe *wqe; union arbelprm_doorbell_record *db_rec; @@ -526,9 +562,11 @@ static int arbel_post_send ( struct ib_device *ibdev, static int arbel_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct io_buffer *iobuf ) { - struct arbel *arbel = ibdev->dev_priv; + struct arbel *arbel = ibdev->priv; + struct arbel_queue_pair *arbel_qp + = container_of ( qp, struct arbel_queue_pair, qp ); struct ib_work_queue *wq = &qp->recv; - struct arbel_recv_work_queue *arbel_recv_wq = wq->dev_priv; + struct arbel_recv_work_queue *arbel_recv_wq = &arbel_qp->recv; struct arbelprm_recv_wqe *wqe; union arbelprm_doorbell_record *db_rec; unsigned int wqe_idx_mask; @@ -575,12 +613,14 @@ static int arbel_complete ( struct ib_device *ibdev, union arbelprm_completion_entry *cqe, ib_completer_t complete_send, ib_completer_t complete_recv ) { - struct arbel *arbel = ibdev->dev_priv; + struct arbel *arbel = ibdev->priv; struct ib_completion completion; struct ib_work_queue *wq; - struct io_buffer *iobuf; + struct ib_queue_pair *qp; + struct arbel_queue_pair *arbel_qp; struct arbel_send_work_queue *arbel_send_wq; struct arbel_recv_work_queue *arbel_recv_wq; + struct io_buffer *iobuf; ib_completer_t complete; unsigned int opcode; unsigned long qpn; @@ -614,14 +654,16 @@ static int arbel_complete ( struct ib_device *ibdev, arbel, cq->cqn, ( is_send ? "send" : "recv" ), qpn ); return -EIO; } + qp = wq->qp; + arbel_qp = container_of ( qp, struct arbel_queue_pair, qp ); /* Identify work queue entry index */ if ( is_send ) { - arbel_send_wq = wq->dev_priv; + arbel_send_wq = &arbel_qp->send; wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_send_wq->wqe ) ) / sizeof ( arbel_send_wq->wqe[0] ) ); } else { - arbel_recv_wq = wq->dev_priv; + arbel_recv_wq = &arbel_qp->recv; wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_recv_wq->wqe ) ) / sizeof ( arbel_recv_wq->wqe[0] ) ); } @@ -637,7 +679,7 @@ static int arbel_complete ( struct ib_device *ibdev, /* Pass off to caller's completion handler */ complete = ( is_send ? complete_send : complete_recv ); - complete ( ibdev, wq->qp, &completion, iobuf ); + complete ( ibdev, qp, &completion, iobuf ); return rc; } @@ -654,8 +696,9 @@ static void arbel_poll_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq, ib_completer_t complete_send, ib_completer_t complete_recv ) { - struct arbel *arbel = ibdev->dev_priv; - struct arbel_completion_queue *arbel_cq = cq->dev_priv; + struct arbel *arbel = ibdev->priv; + struct arbel_completion_queue *arbel_cq + = container_of ( cq, struct arbel_completion_queue, cq ); union arbelprm_doorbell_record *db_rec; union arbelprm_completion_entry *cqe; unsigned int cqe_idx_mask; @@ -757,20 +800,20 @@ static int arbel_probe ( struct pci_device *pci, static_arbel.uar = memfree_pci_dev.uar; static_arbel.db_rec = dev_ib_data.uar_context_base; static_arbel.reserved_lkey = dev_ib_data.mkey; - static_arbel_ipoib_send_wq.wqe = + static_ipoib_qp.send.wqe = ( ( struct udqp_st * ) qph )->snd_wq; - static_arbel_ipoib_recv_wq.wqe = + static_ipoib_qp.recv.wqe = ( ( struct udqp_st * ) qph )->rcv_wq; - static_arbel_ipoib_send_cq.cqe = + static_ipoib_send_cq.cqe = ( ( struct cq_st * ) ib_data.ipoib_snd_cq )->cq_buf; - static_arbel_ipoib_recv_cq.cqe = + static_ipoib_recv_cq.cqe = ( ( struct cq_st * ) ib_data.ipoib_rcv_cq )->cq_buf; - static_ipoib_qp.qpn = ib_get_qpn ( qph ); - static_ipoib_qp.priv = netdev; - list_add ( &static_ipoib_qp.send.list, - &static_ipoib_send_cq.work_queues ); - list_add ( &static_ipoib_qp.recv.list, - &static_ipoib_recv_cq.work_queues ); + static_ipoib_qp.qp.qpn = ib_get_qpn ( qph ); + static_ipoib_qp.qp.priv = netdev; + list_add ( &static_ipoib_qp.qp.send.list, + &static_ipoib_send_cq.cq.work_queues ); + list_add ( &static_ipoib_qp.qp.recv.list, + &static_ipoib_recv_cq.cq.work_queues ); struct arbelprm_query_dev_lim dev_lim; memset ( &dev_lim, 0xaa, sizeof ( dev_lim ) ); diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 85684b635..dd8022fb6 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -89,8 +89,6 @@ struct ib_work_queue { unsigned long next_idx; /** I/O buffers assigned to work queue */ struct io_buffer **iobufs; - /** Device private data */ - void *dev_priv; }; /** An Infiniband Queue Pair */ @@ -103,8 +101,6 @@ struct ib_queue_pair { struct ib_work_queue recv; /** Queue owner private data */ void *priv; - /** Device private data */ - void *dev_priv; }; /** An Infiniband Completion Queue */ @@ -123,8 +119,6 @@ struct ib_completion_queue { unsigned long next_idx; /** List of work queues completing to this queue */ struct list_head work_queues; - /** Device private data */ - void *dev_priv; }; /** An Infiniband completion */ @@ -224,8 +218,8 @@ struct ib_device_operations { /** An Infiniband device */ struct ib_device { - /** Device private data */ - void *dev_priv; + /** Driver private data */ + void *priv; }; -- cgit v1.2.3-55-g7522 From e238bb1e439cb614f2986a70546559efef16239f Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 14:39:51 +0100 Subject: destroy_cq() now implemented (not tested). --- src/drivers/net/mlx_ipoib/arbel.h | 4 +++ src/drivers/net/mlx_ipoib/mt25218.c | 65 +++++++++++++++++++++++++++++++++++-- src/include/gpxe/infiniband.h | 19 +++++++++++ 3 files changed, 86 insertions(+), 2 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 37fe3693b..d38424676 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -28,6 +28,7 @@ /* HCA command register opcodes */ #define ARBEL_HCR_QUERY_DEV_LIM 0x0003 #define ARBEL_HCR_SW2HW_CQ 0x0016 +#define ARBEL_HCR_HW2SW_CQ 0x0017 /* * Wrapper structures for hardware datatypes @@ -247,6 +248,9 @@ struct arbel { #define ARBEL_HCR_OUT_CMD( _opcode, _out_mbox, _out_len ) \ ARBEL_HCR_CMD ( _opcode, 0, 0, _out_mbox, _out_len ) +#define ARBEL_HCR_VOID_CMD( _opcode ) \ + ARBEL_HCR_CMD ( _opcode, 0, 0, 0, 0 ) + /* * Doorbell record allocation * diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index dfdba4b56..64ae992ba 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -270,6 +270,13 @@ static struct net_device_operations mlx_operations = { +/*************************************************************************** + * + * Queue number allocation + * + *************************************************************************** + */ + /** * Allocate queue number * @@ -444,6 +451,13 @@ arbel_cmd_sw2hw_cq ( struct arbel *arbel, unsigned long cqn, 0, cqctx, cqn, NULL ); } +static inline int +arbel_cmd_hw2sw_cq ( struct arbel *arbel, unsigned long cqn ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_HW2SW_CQ ), + 1, NULL, cqn, NULL ); +} + /*************************************************************************** * * Completion queue operations @@ -548,8 +562,9 @@ static int arbel_create_cq ( struct ib_device *ibdev, return 0; err_sw2hw: - memset ( ci_db_rec, 0, sizeof ( *ci_db_rec ) ); - memset ( arm_db_rec, 0, sizeof ( *arm_db_rec ) ); + MLX_FILL_1 ( ci_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + MLX_FILL_1 ( arm_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + free_dma ( arbel_cq->cqe, cqe_size ); err_cqe: free ( arbel_cq ); err_arbel_cq: @@ -558,6 +573,50 @@ static int arbel_create_cq ( struct ib_device *ibdev, return rc; } +/** + * Destroy completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + */ +static void arbel_destroy_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ) { + struct arbel *arbel = ibdev->priv; + struct arbel_completion_queue *arbel_cq = + container_of ( cq, struct arbel_completion_queue, cq ); + struct arbelprm_cq_ci_db_record *ci_db_rec; + struct arbelprm_cq_arm_db_record *arm_db_rec; + int cqn_offset; + size_t cqe_size; + unsigned int ci_doorbell_idx; + unsigned int arm_doorbell_idx; + int rc; + + assert ( list_empty ( &cq->work_queues ) ); + + /* Take ownership back from hardware */ + if ( ( rc = arbel_cmd_hw2sw_cq ( arbel, cq->cqn ) ) != 0 ) { + DBGC ( arbel, "Arbel %p FATAL HW2SW_CQ failed: %s\n", + arbel, strerror ( rc ) ); + /* Leak memory and return; at least we avoid corruption */ + return; + } + + /* Clear doorbell records */ + cqn_offset = ( cq->cqn - arbel->limits.reserved_cqs ); + ci_doorbell_idx = arbel_cq_ci_doorbell_idx ( cqn_offset ); + arm_doorbell_idx = arbel_cq_arm_doorbell_idx ( cqn_offset ); + ci_db_rec = &arbel->db_rec[ci_doorbell_idx].cq_ci; + arm_db_rec = &arbel->db_rec[arm_doorbell_idx].cq_arm; + MLX_FILL_1 ( ci_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + MLX_FILL_1 ( arm_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + + /* Free memory */ + cqe_size = ( cq->num_cqes * sizeof ( arbel_cq->cqe[0] ) ); + free_dma ( arbel_cq->cqe, cqe_size ); + free ( arbel_cq ); + arbel_free_qn_offset ( arbel->cq_inuse, cqn_offset ); +} /*************************************************************************** * @@ -863,6 +922,8 @@ static void arbel_poll_cq ( struct ib_device *ibdev, /** Arbel Infiniband operations */ static struct ib_device_operations arbel_ib_operations = { + .create_cq = arbel_create_cq, + .destroy_cq = arbel_destroy_cq, .post_send = arbel_post_send, .post_recv = arbel_post_recv, .poll_cq = arbel_poll_cq, diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index dd8022fb6..973c58238 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -168,6 +168,25 @@ struct ib_address_vector { * These represent a subset of the Infiniband Verbs. */ struct ib_device_operations { + /** + * Create completion queue + * + * @v ibdev Infiniband device + * @v log2_num_cqes Log2 of the number of completion queue entries + * @ret new_cq New completion queue + * @ret rc Return status code + */ + int ( * create_cq ) ( struct ib_device *ibdev, + unsigned int log2_num_cqes, + struct ib_completion_queue **new_cq ); + /** + * Destroy completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + */ + void ( * destroy_cq ) ( struct ib_device *ibdev, + struct ib_completion_queue *cq ); /** Post send work queue entry * * @v ibdev Infiniband device -- cgit v1.2.3-55-g7522 From b21d4ca21e65025410df73b34d685b6e78c86f0d Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 17:25:15 +0100 Subject: Revert to dev_priv/owner_priv scheme, rather than container_of; it makes it easier to put the generic allocation code into infiniband.c --- src/drivers/net/mlx_ipoib/arbel.h | 23 +++- src/drivers/net/mlx_ipoib/mt25218.c | 236 +++++++++++++++++++++--------------- src/include/gpxe/infiniband.h | 20 +-- src/net/infiniband.c | 51 ++++++++ 4 files changed, 222 insertions(+), 108 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index d38424676..a1ca21f99 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -107,6 +107,8 @@ struct arbel_dev_limits { unsigned long reserved_uars; /** Number of reserved CQs */ unsigned long reserved_cqs; + /** Number of reserved QPs */ + unsigned long reserved_qps; }; /** Alignment of Arbel send work queue entries */ @@ -143,6 +145,15 @@ struct arbel_recv_work_queue { union arbel_recv_wqe *wqe; }; +/** Maximum number of allocatable queue pairs + * + * This is a policy decision, not a device limit. + */ +#define ARBEL_MAX_QPS 8 + +/** Base queue pair number */ +#define ARBEL_QPN_BASE 0x550000 + /** An Arbel queue pair */ struct arbel_queue_pair { /** Infiniband queue pair */ @@ -161,10 +172,10 @@ struct arbel_queue_pair { /** An Arbel completion queue */ struct arbel_completion_queue { - /** Infiniband completion queue */ - struct ib_completion_queue cq; - /** Doorbell record number */ - unsigned int doorbell_idx; + /** Consumer counter doorbell record number */ + unsigned int ci_doorbell_idx; + /** Arm queue doorbell record number */ + unsigned int arm_doorbell_idx; /** Completion queue entries */ union arbelprm_completion_entry *cqe; }; @@ -200,6 +211,8 @@ struct arbel { /** Completion queue in-use bitmask */ arbel_bitmask_t cq_inuse[ ARBEL_BITMASK_SIZE ( ARBEL_MAX_CQS ) ]; + /** Queue pair in-use bitmask */ + arbel_bitmask_t qp_inuse[ ARBEL_BITMASK_SIZE ( ARBEL_MAX_QPS ) ]; /** Device limits */ struct arbel_dev_limits limits; @@ -301,7 +314,7 @@ arbel_recv_doorbell_idx ( unsigned int qpn_offset ) { } /** - * Get commpletion queue consumer counter doorbell index + * Get completion queue consumer counter doorbell index * * @v cqn_offset Completion queue number offset * @ret doorbell_idx Doorbell index diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 64ae992ba..c466adb6b 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -53,28 +53,28 @@ static struct io_buffer *static_ipoib_tx_ring[NUM_IPOIB_SND_WQES]; static struct io_buffer *static_ipoib_rx_ring[NUM_IPOIB_RCV_WQES]; static struct arbel static_arbel; -static struct arbel_completion_queue static_ipoib_send_cq; -static struct arbel_completion_queue static_ipoib_recv_cq; - -static struct arbel_queue_pair static_ipoib_qp = { - .qp = { - .send = { - .qp = &static_ipoib_qp.qp, - .is_send = 1, - .cq = &static_ipoib_send_cq.cq, - .num_wqes = NUM_IPOIB_SND_WQES, - .iobufs = static_ipoib_tx_ring, - .list = LIST_HEAD_INIT (static_ipoib_qp.qp.send.list), - }, - .recv = { - .qp = &static_ipoib_qp.qp, - .is_send = 0, - .cq = &static_ipoib_recv_cq.cq, - .num_wqes = NUM_IPOIB_RCV_WQES, - .iobufs = static_ipoib_rx_ring, - .list = LIST_HEAD_INIT (static_ipoib_qp.qp.recv.list), - }, - }, + +static struct arbel_completion_queue static_arbel_ipoib_send_cq = { + .ci_doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, +}; +static struct ib_completion_queue static_ipoib_send_cq = { + .cqn = 1234, /* Only used for debug messages */ + .num_cqes = NUM_IPOIB_SND_CQES, + .work_queues = LIST_HEAD_INIT ( static_ipoib_send_cq.work_queues ), + .dev_priv = &static_arbel_ipoib_send_cq, +}; + +static struct arbel_completion_queue static_arbel_ipoib_recv_cq = { + .ci_doorbell_idx = IPOIB_RCV_CQ_CI_DB_IDX, +}; +static struct ib_completion_queue static_ipoib_recv_cq = { + .cqn = 2345, /* Only used for debug messages */ + .num_cqes = NUM_IPOIB_RCV_CQES, + .work_queues = LIST_HEAD_INIT ( static_ipoib_recv_cq.work_queues ), + .dev_priv = &static_arbel_ipoib_recv_cq, +}; + +static struct arbel_queue_pair static_arbel_ipoib_qp = { .send = { .doorbell_idx = IPOIB_SND_QP_DB_IDX, }, @@ -82,24 +82,31 @@ static struct arbel_queue_pair static_ipoib_qp = { .doorbell_idx = IPOIB_RCV_QP_DB_IDX, }, }; -static struct arbel_completion_queue static_ipoib_send_cq = { - .cq = { - .cqn = 1234, /* Only used for debug messages */ - .num_cqes = NUM_IPOIB_SND_CQES, - .work_queues = LIST_HEAD_INIT (static_ipoib_send_cq.cq.work_queues), +static struct ib_queue_pair static_ipoib_qp = { + .send = { + .qp = &static_ipoib_qp, + .is_send = 1, + .cq = &static_ipoib_send_cq, + .num_wqes = NUM_IPOIB_SND_WQES, + .iobufs = static_ipoib_tx_ring, + .list = LIST_HEAD_INIT (static_ipoib_qp.send.list), + .dev_priv = &static_arbel_ipoib_qp.send, }, - .doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, -}; -static struct arbel_completion_queue static_ipoib_recv_cq = { - .cq = { - .cqn = 2345, /* Only used for debug messages */ - .num_cqes = NUM_IPOIB_RCV_CQES, - .work_queues = LIST_HEAD_INIT (static_ipoib_recv_cq.cq.work_queues), + .recv = { + .qp = &static_ipoib_qp, + .is_send = 0, + .cq = &static_ipoib_recv_cq, + .num_wqes = NUM_IPOIB_RCV_WQES, + .iobufs = static_ipoib_rx_ring, + .list = LIST_HEAD_INIT (static_ipoib_qp.recv.list), + .dev_priv = &static_arbel_ipoib_qp.recv, }, - .doorbell_idx = IPOIB_RCV_CQ_CI_DB_IDX, + .dev_priv = &static_arbel_ipoib_qp, }; + + static struct ib_device static_ibdev = { - .priv = &static_arbel, + .dev_priv = &static_arbel, }; @@ -150,7 +157,7 @@ static int mlx_transmit_direct ( struct net_device *netdev, }; memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); - rc = arbel_post_send ( &static_ibdev, &static_ipoib_qp.qp, &av, iobuf ); + rc = arbel_post_send ( &static_ibdev, &static_ipoib_qp, &av, iobuf ); return rc; } @@ -164,7 +171,7 @@ static void temp_complete_send ( struct ib_device *ibdev __unused, struct ib_queue_pair *qp, struct ib_completion *completion, struct io_buffer *iobuf ) { - struct net_device *netdev = qp->priv; + struct net_device *netdev = qp->owner_priv; DBG ( "Wahey! TX completion\n" ); netdev_tx_complete_err ( netdev, iobuf, @@ -175,7 +182,7 @@ static void temp_complete_recv ( struct ib_device *ibdev __unused, struct ib_queue_pair *qp, struct ib_completion *completion, struct io_buffer *iobuf ) { - struct net_device *netdev = qp->priv; + struct net_device *netdev = qp->owner_priv; struct mlx_nic *mlx = netdev->priv; DBG ( "Yay! RX completion on %p len %zx:\n", iobuf, completion->len ); @@ -205,7 +212,7 @@ static void mlx_refill_rx ( struct net_device *netdev ) { break; DBG ( "Posting RX buffer %p:\n", iobuf ); if ( ( rc = arbel_post_recv ( &static_ibdev, - &static_ipoib_qp.qp, + &static_ipoib_qp, iobuf ) ) != 0 ) { free_iob ( iobuf ); break; @@ -237,11 +244,10 @@ static void mlx_poll ( struct net_device *netdev ) { } /* Poll completion queues */ - arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq.cq, + arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq, temp_complete_send, temp_complete_recv ); - arbel_poll_cq ( &static_ibdev, &static_ipoib_recv_cq.cq, + arbel_poll_cq ( &static_ibdev, &static_ipoib_recv_cq, temp_complete_send, temp_complete_recv ); - // mlx_poll_cq ( netdev, mlx->rcv_cqh, mlx_rx_complete ); mlx_refill_rx ( netdev ); } @@ -469,24 +475,18 @@ arbel_cmd_hw2sw_cq ( struct arbel *arbel, unsigned long cqn ) { * Create completion queue * * @v ibdev Infiniband device - * @v log2_num_cqes Log2 of the number of completion queue entries - * @ret new_cq New completion queue + * @v cq Completion queue * @ret rc Return status code */ static int arbel_create_cq ( struct ib_device *ibdev, - unsigned int log2_num_cqes, - struct ib_completion_queue **new_cq ) { - struct arbel *arbel = ibdev->priv; + struct ib_completion_queue *cq ) { + struct arbel *arbel = ibdev->dev_priv; struct arbel_completion_queue *arbel_cq; struct arbelprm_completion_queue_context cqctx; struct arbelprm_cq_ci_db_record *ci_db_rec; struct arbelprm_cq_arm_db_record *arm_db_rec; int cqn_offset; - unsigned int cqn; - unsigned int num_cqes; size_t cqe_size; - unsigned int ci_doorbell_idx; - unsigned int arm_doorbell_idx; unsigned int i; int rc; @@ -497,9 +497,7 @@ static int arbel_create_cq ( struct ib_device *ibdev, rc = cqn_offset; goto err_cqn_offset; } - cqn = ( arbel->limits.reserved_cqs + cqn_offset ); - ci_doorbell_idx = arbel_cq_ci_doorbell_idx ( cqn_offset ); - arm_doorbell_idx = arbel_cq_arm_doorbell_idx ( cqn_offset ); + cq->cqn = ( arbel->limits.reserved_cqs + cqn_offset ); /* Allocate control structures */ arbel_cq = zalloc ( sizeof ( *arbel_cq ) ); @@ -507,58 +505,59 @@ static int arbel_create_cq ( struct ib_device *ibdev, rc = -ENOMEM; goto err_arbel_cq; } - arbel_cq->cq.cqn = cqn; - arbel_cq->cq.num_cqes = num_cqes; - INIT_LIST_HEAD ( &arbel_cq->cq.work_queues ); - arbel_cq->doorbell_idx = ci_doorbell_idx; + arbel_cq->ci_doorbell_idx = arbel_cq_ci_doorbell_idx ( cqn_offset ); + arbel_cq->arm_doorbell_idx = arbel_cq_arm_doorbell_idx ( cqn_offset ); /* Allocate completion queue itself */ - num_cqes = ( 1 << log2_num_cqes ); - cqe_size = ( num_cqes * sizeof ( arbel_cq->cqe[0] ) ); + cqe_size = ( cq->num_cqes * sizeof ( arbel_cq->cqe[0] ) ); arbel_cq->cqe = malloc_dma ( cqe_size, sizeof ( arbel_cq->cqe[0] ) ); if ( ! arbel_cq->cqe ) { rc = -ENOMEM; goto err_cqe; } memset ( arbel_cq->cqe, 0, cqe_size ); - for ( i = 0 ; i < num_cqes ; i++ ) { + for ( i = 0 ; i < cq->num_cqes ; i++ ) { MLX_FILL_1 ( &arbel_cq->cqe[i].normal, 7, owner, 1 ); } barrier(); /* Initialise doorbell records */ - ci_db_rec = &arbel->db_rec[ci_doorbell_idx].cq_ci; + ci_db_rec = &arbel->db_rec[arbel_cq->ci_doorbell_idx].cq_ci; MLX_FILL_1 ( ci_db_rec, 0, counter, 0 ); MLX_FILL_2 ( ci_db_rec, 1, res, ARBEL_UAR_RES_CQ_CI, - cq_number, cqn ); - arm_db_rec = &arbel->db_rec[arm_doorbell_idx].cq_arm; + cq_number, cq->cqn ); + arm_db_rec = &arbel->db_rec[arbel_cq->arm_doorbell_idx].cq_arm; MLX_FILL_1 ( arm_db_rec, 0, counter, 0 ); MLX_FILL_2 ( arm_db_rec, 1, res, ARBEL_UAR_RES_CQ_ARM, - cq_number, cqn ); + cq_number, cq->cqn ); /* Hand queue over to hardware */ memset ( &cqctx, 0, sizeof ( cqctx ) ); MLX_FILL_1 ( &cqctx, 0, st, 0xa /* "Event fired" */ ); MLX_FILL_1 ( &cqctx, 2, start_address_l, virt_to_bus ( arbel_cq->cqe ) ); +#if 0 MLX_FILL_2 ( &cqctx, 3, usr_page, arbel->limits.reserved_uars, log_cq_size, log2_num_cqes ); +#endif MLX_FILL_1 ( &cqctx, 5, c_eqn, arbel->eqn ); MLX_FILL_1 ( &cqctx, 6, pd, ARBEL_GLOBAL_PD ); MLX_FILL_1 ( &cqctx, 7, l_key, arbel->reserved_lkey ); - MLX_FILL_1 ( &cqctx, 12, cqn, cqn ); - MLX_FILL_1 ( &cqctx, 13, cq_ci_db_record, ci_doorbell_idx ); - MLX_FILL_1 ( &cqctx, 14, cq_state_db_record, arm_doorbell_idx ); - if ( ( rc = arbel_cmd_sw2hw_cq ( arbel, cqn, &cqctx ) ) != 0 ) { + MLX_FILL_1 ( &cqctx, 12, cqn, cq->cqn ); + MLX_FILL_1 ( &cqctx, 13, + cq_ci_db_record, arbel_cq->ci_doorbell_idx ); + MLX_FILL_1 ( &cqctx, 14, + cq_state_db_record, arbel_cq->arm_doorbell_idx ); + if ( ( rc = arbel_cmd_sw2hw_cq ( arbel, cq->cqn, &cqctx ) ) != 0 ) { DBGC ( arbel, "Arbel %p SW2HW_CQ failed: %s\n", arbel, strerror ( rc ) ); goto err_sw2hw; } - *new_cq = &arbel_cq->cq; + cq->dev_priv = arbel_cq; return 0; err_sw2hw: @@ -581,9 +580,8 @@ static int arbel_create_cq ( struct ib_device *ibdev, */ static void arbel_destroy_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq ) { - struct arbel *arbel = ibdev->priv; - struct arbel_completion_queue *arbel_cq = - container_of ( cq, struct arbel_completion_queue, cq ); + struct arbel *arbel = ibdev->dev_priv; + struct arbel_completion_queue *arbel_cq = cq->dev_priv; struct arbelprm_cq_ci_db_record *ci_db_rec; struct arbelprm_cq_arm_db_record *arm_db_rec; int cqn_offset; @@ -618,6 +616,53 @@ static void arbel_destroy_cq ( struct ib_device *ibdev, arbel_free_qn_offset ( arbel->cq_inuse, cqn_offset ); } +/*************************************************************************** + * + * Queue pair operations + * + *************************************************************************** + */ + +static int arbel_create_qp ( struct ib_device *ibdev, + unsigned int log2_num_send_wqes, + struct ib_completion_queue *send_cq, + unsigned int log2_num_recv_wqes, + struct ib_completion_queue *recv_cq, + struct ib_queue_pair **new_qp ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_queue_pair *arbel_qp; + struct arbelprm_qp_db_record *send_db_rec; + struct arbelprm_qp_db_record *recv_db_rec; + int qpn_offset; + unsigned int qpn; + unsigned int num_send_wqes; + unsigned int num_recv_wqes; + unsigned int send_doorbell_idx; + unsigned int recv_doorbell_idx; + int rc; + + /* Find a free queue pair number */ + qpn_offset = arbel_alloc_qn_offset ( arbel->qp_inuse, ARBEL_MAX_QPS ); + if ( qpn_offset < 0 ) { + DBGC ( arbel, "Arbel %p out of queue pairs\n", arbel ); + rc = qpn_offset; + goto err_qpn_offset; + } + qpn = ( ARBEL_QPN_BASE + arbel->limits.reserved_qps + qpn_offset ); + send_doorbell_idx = arbel_send_doorbell_idx ( qpn_offset ); + recv_doorbell_idx = arbel_recv_doorbell_idx ( qpn_offset ); + + /* Allocate control structures */ + num_send_wqes = ( 1 << log2_num_send_wqes ); + num_recv_wqes = ( 1 << log2_num_recv_wqes ); + arbel_qp = zalloc ( sizeof ( *arbel_qp ) ); + + return 0; + + err_qpn_offset: + return rc; +} + /*************************************************************************** * * Work request operations @@ -659,9 +704,8 @@ static int arbel_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_address_vector *av, struct io_buffer *iobuf ) { - struct arbel *arbel = ibdev->priv; - struct arbel_queue_pair *arbel_qp - = container_of ( qp, struct arbel_queue_pair, qp ); + struct arbel *arbel = ibdev->dev_priv; + struct arbel_queue_pair *arbel_qp = qp->dev_priv; struct ib_work_queue *wq = &qp->send; struct arbel_send_work_queue *arbel_send_wq = &arbel_qp->send; struct arbelprm_ud_send_wqe *prev_wqe; @@ -749,9 +793,8 @@ static int arbel_post_send ( struct ib_device *ibdev, static int arbel_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct io_buffer *iobuf ) { - struct arbel *arbel = ibdev->priv; - struct arbel_queue_pair *arbel_qp - = container_of ( qp, struct arbel_queue_pair, qp ); + struct arbel *arbel = ibdev->dev_priv; + struct arbel_queue_pair *arbel_qp = qp->dev_priv; struct ib_work_queue *wq = &qp->recv; struct arbel_recv_work_queue *arbel_recv_wq = &arbel_qp->recv; struct arbelprm_recv_wqe *wqe; @@ -800,7 +843,7 @@ static int arbel_complete ( struct ib_device *ibdev, union arbelprm_completion_entry *cqe, ib_completer_t complete_send, ib_completer_t complete_recv ) { - struct arbel *arbel = ibdev->priv; + struct arbel *arbel = ibdev->dev_priv; struct ib_completion completion; struct ib_work_queue *wq; struct ib_queue_pair *qp; @@ -842,7 +885,7 @@ static int arbel_complete ( struct ib_device *ibdev, return -EIO; } qp = wq->qp; - arbel_qp = container_of ( qp, struct arbel_queue_pair, qp ); + arbel_qp = qp->dev_priv; /* Identify work queue entry index */ if ( is_send ) { @@ -883,9 +926,8 @@ static void arbel_poll_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq, ib_completer_t complete_send, ib_completer_t complete_recv ) { - struct arbel *arbel = ibdev->priv; - struct arbel_completion_queue *arbel_cq - = container_of ( cq, struct arbel_completion_queue, cq ); + struct arbel *arbel = ibdev->dev_priv; + struct arbel_completion_queue *arbel_cq = cq->dev_priv; struct arbelprm_cq_ci_db_record *ci_db_rec; union arbelprm_completion_entry *cqe; unsigned int cqe_idx_mask; @@ -914,7 +956,7 @@ static void arbel_poll_cq ( struct ib_device *ibdev, /* Update completion queue's index */ cq->next_idx++; /* Update doorbell record */ - ci_db_rec = &arbel->db_rec[arbel_cq->doorbell_idx].cq_ci; + ci_db_rec = &arbel->db_rec[arbel_cq->ci_doorbell_idx].cq_ci; MLX_FILL_1 ( ci_db_rec, 0, counter, ( cq->next_idx & 0xffffffffUL ) ); } @@ -992,20 +1034,20 @@ static int arbel_probe ( struct pci_device *pci, arbel->db_rec = dev_ib_data.uar_context_base; arbel->reserved_lkey = dev_ib_data.mkey; arbel->eqn = dev_ib_data.eq.eqn; - static_ipoib_qp.send.wqe = + static_arbel_ipoib_qp.send.wqe = ( ( struct udqp_st * ) qph )->snd_wq; - static_ipoib_qp.recv.wqe = + static_arbel_ipoib_qp.recv.wqe = ( ( struct udqp_st * ) qph )->rcv_wq; - static_ipoib_send_cq.cqe = + static_arbel_ipoib_send_cq.cqe = ( ( struct cq_st * ) ib_data.ipoib_snd_cq )->cq_buf; - static_ipoib_recv_cq.cqe = + static_arbel_ipoib_recv_cq.cqe = ( ( struct cq_st * ) ib_data.ipoib_rcv_cq )->cq_buf; - static_ipoib_qp.qp.qpn = ib_get_qpn ( qph ); - static_ipoib_qp.qp.priv = netdev; - list_add ( &static_ipoib_qp.qp.send.list, - &static_ipoib_send_cq.cq.work_queues ); - list_add ( &static_ipoib_qp.qp.recv.list, - &static_ipoib_recv_cq.cq.work_queues ); + static_ipoib_qp.qpn = ib_get_qpn ( qph ); + static_ipoib_qp.owner_priv = netdev; + list_add ( &static_ipoib_qp.send.list, + &static_ipoib_send_cq.work_queues ); + list_add ( &static_ipoib_qp.recv.list, + &static_ipoib_recv_cq.work_queues ); /* Get device limits */ if ( ( rc = arbel_cmd_query_dev_lim ( arbel, &dev_lim ) ) != 0 ) { @@ -1016,6 +1058,8 @@ static int arbel_probe ( struct pci_device *pci, arbel->limits.reserved_uars = MLX_GET ( &dev_lim, num_rsvd_uars ); arbel->limits.reserved_cqs = ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); + arbel->limits.reserved_qps = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); DBG ( "Device limits:\n "); DBG_HD ( &dev_lim, sizeof ( dev_lim ) ); diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 973c58238..d7f8b4ab2 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -89,6 +89,8 @@ struct ib_work_queue { unsigned long next_idx; /** I/O buffers assigned to work queue */ struct io_buffer **iobufs; + /** Device private data */ + void *dev_priv; }; /** An Infiniband Queue Pair */ @@ -99,8 +101,10 @@ struct ib_queue_pair { struct ib_work_queue send; /** Receive queue */ struct ib_work_queue recv; + /** Device private data */ + void *dev_priv; /** Queue owner private data */ - void *priv; + void *owner_priv; }; /** An Infiniband Completion Queue */ @@ -119,6 +123,8 @@ struct ib_completion_queue { unsigned long next_idx; /** List of work queues completing to this queue */ struct list_head work_queues; + /** Device private data */ + void *dev_priv; }; /** An Infiniband completion */ @@ -172,13 +178,11 @@ struct ib_device_operations { * Create completion queue * * @v ibdev Infiniband device - * @v log2_num_cqes Log2 of the number of completion queue entries - * @ret new_cq New completion queue + * @v cq Completion queue * @ret rc Return status code */ int ( * create_cq ) ( struct ib_device *ibdev, - unsigned int log2_num_cqes, - struct ib_completion_queue **new_cq ); + struct ib_completion_queue *cq ); /** * Destroy completion queue * @@ -237,8 +241,10 @@ struct ib_device_operations { /** An Infiniband device */ struct ib_device { - /** Driver private data */ - void *priv; + /** Infiniband operations */ + struct ib_device_operations *op; + /** Device private data */ + void *dev_priv; }; diff --git a/src/net/infiniband.c b/src/net/infiniband.c index 694c88b11..2a29c5b2f 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -17,11 +17,13 @@ */ #include +#include #include #include #include #include #include +#include #include #include #include @@ -33,6 +35,55 @@ * */ +/** + * Create completion queue + * + * @v ibdev Infiniband device + * @v num_cqes Number of completion queue entries + * @ret cq New completion queue + */ +struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev, + unsigned int num_cqes ) { + struct ib_completion_queue *cq; + int rc; + + DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev ); + + /* Allocate and initialise data structure */ + cq = zalloc ( sizeof ( *cq ) ); + if ( ! cq ) + return NULL; + cq->num_cqes = num_cqes; + INIT_LIST_HEAD ( &cq->work_queues ); + + /* Perform device-specific initialisation and get CQN */ + if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not initialise CQ: %s\n", + ibdev, strerror ( rc ) ); + free ( cq ); + return NULL; + } + + DBGC ( ibdev, "IBDEV %p created completion queue %#lx\n", + ibdev, cq->cqn ); + return cq; +} + +/** + * Destroy completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + */ +void ib_destroy_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ) { + DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n", + ibdev, cq->cqn ); + assert ( list_empty ( &cq->work_queues ) ); + ibdev->op->destroy_cq ( ibdev, cq ); + free ( cq ); +} + /** * Find work queue belonging to completion queue * -- cgit v1.2.3-55-g7522 From 6d15a193aa9e3e4129a885e7010d0d480e723bb8 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 17:36:40 +0100 Subject: Add fls() for non-constant values. --- src/core/bitops.c | 10 ++++++++++ src/include/strings.h | 6 +----- 2 files changed, 11 insertions(+), 5 deletions(-) create mode 100644 src/core/bitops.c (limited to 'src/include') diff --git a/src/core/bitops.c b/src/core/bitops.c new file mode 100644 index 000000000..75d57bf99 --- /dev/null +++ b/src/core/bitops.c @@ -0,0 +1,10 @@ +#include + +int __flsl ( long x ) { + int r = 0; + + for ( r = 0 ; x ; r++ ) { + x >>= 1; + } + return r; +} diff --git a/src/include/strings.h b/src/include/strings.h index a087b1d50..968a7c11d 100644 --- a/src/include/strings.h +++ b/src/include/strings.h @@ -40,17 +40,13 @@ __constant_flsl ( unsigned long x ) { return r; } -#define __constant_fls(x) __constant_flsl(x) - /* We don't actually have these functions yet */ -extern int __fls ( int x ); extern int __flsl ( long x ); #define flsl( x ) \ ( __builtin_constant_p ( x ) ? __constant_flsl ( x ) : __flsl ( x ) ) -#define fls( x ) \ - ( __builtin_constant_p ( x ) ? __constant_fls ( x ) : __fls ( x ) ) +#define fls( x ) flsl ( x ) extern int strcasecmp ( const char *s1, const char *s2 ); -- cgit v1.2.3-55-g7522 From 251cc84ed6e10c86a0e97831d8817b993dac13db Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 19:03:24 +0100 Subject: Started implementing create_qp() and destroy_qp(). --- src/drivers/net/mlx_ipoib/arbel.h | 13 +- src/drivers/net/mlx_ipoib/mt25218.c | 233 ++++++++++++++++++++++++++++++------ src/include/gpxe/infiniband.h | 32 ++++- src/net/infiniband.c | 72 ++++++++++- 4 files changed, 302 insertions(+), 48 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index a1ca21f99..2ef446fa6 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -29,6 +29,10 @@ #define ARBEL_HCR_QUERY_DEV_LIM 0x0003 #define ARBEL_HCR_SW2HW_CQ 0x0016 #define ARBEL_HCR_HW2SW_CQ 0x0017 +#define ARBEL_HCR_RST2INIT_QPEE 0x0019 +#define ARBEL_HCR_INIT2RTR_QPEE 0x001a +#define ARBEL_HCR_RTR2RTS_QPEE 0x001b +#define ARBEL_HCR_2RST_QPEE 0x0021 /* * Wrapper structures for hardware datatypes @@ -43,6 +47,7 @@ struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_query_dev_lim ); +struct MLX_DECLARE_STRUCT ( arbelprm_queue_pair_ee_context_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_recv_wqe_segment_next ); struct MLX_DECLARE_STRUCT ( arbelprm_send_doorbell ); struct MLX_DECLARE_STRUCT ( arbelprm_ud_address_vector ); @@ -126,6 +131,8 @@ struct arbel_send_work_queue { unsigned int doorbell_idx; /** Work queue entries */ union arbel_send_wqe *wqe; + /** Size of work queue */ + size_t wqe_size; }; /** Alignment of Arbel receive work queue entries */ @@ -143,6 +150,8 @@ struct arbel_recv_work_queue { unsigned int doorbell_idx; /** Work queue entries */ union arbel_recv_wqe *wqe; + /** Size of work queue */ + size_t wqe_size; }; /** Maximum number of allocatable queue pairs @@ -156,8 +165,6 @@ struct arbel_recv_work_queue { /** An Arbel queue pair */ struct arbel_queue_pair { - /** Infiniband queue pair */ - struct ib_queue_pair qp; /** Send work queue */ struct arbel_send_work_queue send; /** Receive work queue */ @@ -178,6 +185,8 @@ struct arbel_completion_queue { unsigned int arm_doorbell_idx; /** Completion queue entries */ union arbelprm_completion_entry *cqe; + /** Size of completion queue */ + size_t cqe_size; }; /** An Arbel resource bitmask */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index c466adb6b..383689d2a 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -464,6 +464,40 @@ arbel_cmd_hw2sw_cq ( struct arbel *arbel, unsigned long cqn ) { 1, NULL, cqn, NULL ); } +static inline int +arbel_cmd_rst2init_qpee ( struct arbel *arbel, unsigned long qpn, + struct arbelprm_queue_pair_ee_context_entry *ctx ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_RST2INIT_QPEE, + 1, sizeof ( *ctx ) ), + 0, ctx, qpn, NULL ); +} + +static inline int +arbel_cmd_init2rtr_qpee ( struct arbel *arbel, unsigned long qpn, + struct arbelprm_queue_pair_ee_context_entry *ctx ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_INIT2RTR_QPEE, + 1, sizeof ( *ctx ) ), + 0, ctx, qpn, NULL ); +} + +static inline int +arbel_cmd_rtr2rts_qpee ( struct arbel *arbel, unsigned long qpn, + struct arbelprm_queue_pair_ee_context_entry *ctx ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_RTR2RTS_QPEE, + 1, sizeof ( *ctx ) ), + 0, ctx, qpn, NULL ); +} + +static inline int +arbel_cmd_2rst_qpee ( struct arbel *arbel, unsigned long qpn ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_2RST_QPEE ), + 0x03, NULL, qpn, NULL ); +} + /*************************************************************************** * * Completion queue operations @@ -486,7 +520,6 @@ static int arbel_create_cq ( struct ib_device *ibdev, struct arbelprm_cq_ci_db_record *ci_db_rec; struct arbelprm_cq_arm_db_record *arm_db_rec; int cqn_offset; - size_t cqe_size; unsigned int i; int rc; @@ -509,13 +542,14 @@ static int arbel_create_cq ( struct ib_device *ibdev, arbel_cq->arm_doorbell_idx = arbel_cq_arm_doorbell_idx ( cqn_offset ); /* Allocate completion queue itself */ - cqe_size = ( cq->num_cqes * sizeof ( arbel_cq->cqe[0] ) ); - arbel_cq->cqe = malloc_dma ( cqe_size, sizeof ( arbel_cq->cqe[0] ) ); + arbel_cq->cqe_size = ( cq->num_cqes * sizeof ( arbel_cq->cqe[0] ) ); + arbel_cq->cqe = malloc_dma ( arbel_cq->cqe_size, + sizeof ( arbel_cq->cqe[0] ) ); if ( ! arbel_cq->cqe ) { rc = -ENOMEM; goto err_cqe; } - memset ( arbel_cq->cqe, 0, cqe_size ); + memset ( arbel_cq->cqe, 0, arbel_cq->cqe_size ); for ( i = 0 ; i < cq->num_cqes ; i++ ) { MLX_FILL_1 ( &arbel_cq->cqe[i].normal, 7, owner, 1 ); } @@ -538,11 +572,9 @@ static int arbel_create_cq ( struct ib_device *ibdev, MLX_FILL_1 ( &cqctx, 0, st, 0xa /* "Event fired" */ ); MLX_FILL_1 ( &cqctx, 2, start_address_l, virt_to_bus ( arbel_cq->cqe ) ); -#if 0 MLX_FILL_2 ( &cqctx, 3, usr_page, arbel->limits.reserved_uars, - log_cq_size, log2_num_cqes ); -#endif + log_cq_size, ( fls ( cq->num_cqes ) - 1 ) ); MLX_FILL_1 ( &cqctx, 5, c_eqn, arbel->eqn ); MLX_FILL_1 ( &cqctx, 6, pd, ARBEL_GLOBAL_PD ); MLX_FILL_1 ( &cqctx, 7, l_key, arbel->reserved_lkey ); @@ -554,16 +586,16 @@ static int arbel_create_cq ( struct ib_device *ibdev, if ( ( rc = arbel_cmd_sw2hw_cq ( arbel, cq->cqn, &cqctx ) ) != 0 ) { DBGC ( arbel, "Arbel %p SW2HW_CQ failed: %s\n", arbel, strerror ( rc ) ); - goto err_sw2hw; + goto err_sw2hw_cq; } cq->dev_priv = arbel_cq; return 0; - err_sw2hw: + err_sw2hw_cq: MLX_FILL_1 ( ci_db_rec, 1, res, ARBEL_UAR_RES_NONE ); MLX_FILL_1 ( arm_db_rec, 1, res, ARBEL_UAR_RES_NONE ); - free_dma ( arbel_cq->cqe, cqe_size ); + free_dma ( arbel_cq->cqe, arbel_cq->cqe_size ); err_cqe: free ( arbel_cq ); err_arbel_cq: @@ -585,35 +617,31 @@ static void arbel_destroy_cq ( struct ib_device *ibdev, struct arbelprm_cq_ci_db_record *ci_db_rec; struct arbelprm_cq_arm_db_record *arm_db_rec; int cqn_offset; - size_t cqe_size; - unsigned int ci_doorbell_idx; - unsigned int arm_doorbell_idx; int rc; - assert ( list_empty ( &cq->work_queues ) ); - /* Take ownership back from hardware */ if ( ( rc = arbel_cmd_hw2sw_cq ( arbel, cq->cqn ) ) != 0 ) { - DBGC ( arbel, "Arbel %p FATAL HW2SW_CQ failed: %s\n", - arbel, strerror ( rc ) ); + DBGC ( arbel, "Arbel %p FATAL HW2SW_CQ failed on CQN %#lx: " + "%s\n", arbel, cq->cqn, strerror ( rc ) ); /* Leak memory and return; at least we avoid corruption */ return; } /* Clear doorbell records */ - cqn_offset = ( cq->cqn - arbel->limits.reserved_cqs ); - ci_doorbell_idx = arbel_cq_ci_doorbell_idx ( cqn_offset ); - arm_doorbell_idx = arbel_cq_arm_doorbell_idx ( cqn_offset ); - ci_db_rec = &arbel->db_rec[ci_doorbell_idx].cq_ci; - arm_db_rec = &arbel->db_rec[arm_doorbell_idx].cq_arm; + ci_db_rec = &arbel->db_rec[arbel_cq->ci_doorbell_idx].cq_ci; + arm_db_rec = &arbel->db_rec[arbel_cq->arm_doorbell_idx].cq_arm; MLX_FILL_1 ( ci_db_rec, 1, res, ARBEL_UAR_RES_NONE ); MLX_FILL_1 ( arm_db_rec, 1, res, ARBEL_UAR_RES_NONE ); /* Free memory */ - cqe_size = ( cq->num_cqes * sizeof ( arbel_cq->cqe[0] ) ); - free_dma ( arbel_cq->cqe, cqe_size ); + free_dma ( arbel_cq->cqe, arbel_cq->cqe_size ); free ( arbel_cq ); + + /* Mark queue number as free */ + cqn_offset = ( cq->cqn - arbel->limits.reserved_cqs ); arbel_free_qn_offset ( arbel->cq_inuse, cqn_offset ); + + cq->dev_priv = NULL; } /*************************************************************************** @@ -623,22 +651,50 @@ static void arbel_destroy_cq ( struct ib_device *ibdev, *************************************************************************** */ +static int arbel_create_send_wq ( struct arbel_send_work_queue *arbel_send_wq, + unsigned int num_wqes ) { + + arbel_send_wq->wqe_size = ( num_wqes * + sizeof ( arbel_send_wq->wqe[0] ) ); + arbel_send_wq->wqe = malloc_dma ( arbel_send_wq->wqe_size, + sizeof ( arbel_send_wq->wqe[0] ) ); + if ( ! arbel_send_wq->wqe ) + return -ENOMEM; + + // initialise (prelink?) +} + +static int arbel_create_recv_wq ( struct arbel_recv_work_queue *arbel_recv_wq, + unsigned int num_wqes ) { + + arbel_recv_wq->wqe_size = ( num_wqes * + sizeof ( arbel_recv_wq->wqe[0] ) ); + arbel_recv_wq->wqe = malloc_dma ( arbel_recv_wq->wqe_size, + sizeof ( arbel_recv_wq->wqe[0] ) ); + if ( ! arbel_recv_wq->wqe ) + return -ENOMEM; + + // initialise (prelink?) +} + + + + +/** + * Create queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @ret rc Return status code + */ static int arbel_create_qp ( struct ib_device *ibdev, - unsigned int log2_num_send_wqes, - struct ib_completion_queue *send_cq, - unsigned int log2_num_recv_wqes, - struct ib_completion_queue *recv_cq, - struct ib_queue_pair **new_qp ) { + struct ib_queue_pair *qp ) { struct arbel *arbel = ibdev->dev_priv; struct arbel_queue_pair *arbel_qp; + struct arbelprm_queue_pair_ee_context_entry qpctx; struct arbelprm_qp_db_record *send_db_rec; struct arbelprm_qp_db_record *recv_db_rec; int qpn_offset; - unsigned int qpn; - unsigned int num_send_wqes; - unsigned int num_recv_wqes; - unsigned int send_doorbell_idx; - unsigned int recv_doorbell_idx; int rc; /* Find a free queue pair number */ @@ -648,21 +704,117 @@ static int arbel_create_qp ( struct ib_device *ibdev, rc = qpn_offset; goto err_qpn_offset; } - qpn = ( ARBEL_QPN_BASE + arbel->limits.reserved_qps + qpn_offset ); - send_doorbell_idx = arbel_send_doorbell_idx ( qpn_offset ); - recv_doorbell_idx = arbel_recv_doorbell_idx ( qpn_offset ); + qp->qpn = ( ARBEL_QPN_BASE + arbel->limits.reserved_qps + qpn_offset ); /* Allocate control structures */ - num_send_wqes = ( 1 << log2_num_send_wqes ); - num_recv_wqes = ( 1 << log2_num_recv_wqes ); arbel_qp = zalloc ( sizeof ( *arbel_qp ) ); + if ( ! arbel_qp ) { + rc = -ENOMEM; + goto err_arbel_qp; + } + arbel_qp->send.doorbell_idx = arbel_send_doorbell_idx ( qpn_offset ); + arbel_qp->recv.doorbell_idx = arbel_recv_doorbell_idx ( qpn_offset ); + + /* Create send and receive work queues */ + if ( ( rc = arbel_create_send_wq ( &arbel_qp->send, + qp->send.num_wqes ) ) != 0 ) + goto err_create_send_wq; + if ( ( rc = arbel_create_recv_wq ( &arbel_qp->recv, + qp->recv.num_wqes ) ) != 0 ) + goto err_create_recv_wq; + + /* Initialise doorbell records */ + send_db_rec = &arbel->db_rec[arbel_qp->send.doorbell_idx].qp; + MLX_FILL_1 ( send_db_rec, 0, counter, 0 ); + MLX_FILL_2 ( send_db_rec, 1, + res, ARBEL_UAR_RES_SQ, + qp_number, qp->qpn ); + recv_db_rec = &arbel->db_rec[arbel_qp->recv.doorbell_idx].qp; + MLX_FILL_1 ( recv_db_rec, 0, counter, 0 ); + MLX_FILL_2 ( recv_db_rec, 1, + res, ARBEL_UAR_RES_RQ, + qp_number, qp->qpn ); + + /* Hand queue over to hardware */ + memset ( &qpctx, 0, sizeof ( qpctx ) ); + // ... fill in context + if ( ( rc = arbel_cmd_rst2init_qpee ( arbel, qp->qpn, &qpctx )) != 0 ){ + DBGC ( arbel, "Arbel %p RST2INIT_QPEE failed: %s\n", + arbel, strerror ( rc ) ); + goto err_rst2init_qpee; + } + if ( ( rc = arbel_cmd_init2rtr_qpee ( arbel, qp->qpn, &qpctx )) != 0 ){ + DBGC ( arbel, "Arbel %p INIT2RTR_QPEE failed: %s\n", + arbel, strerror ( rc ) ); + goto err_init2rtr_qpee; + } + if ( ( rc = arbel_cmd_rtr2rts_qpee ( arbel, qp->qpn, &qpctx ) ) != 0 ){ + DBGC ( arbel, "Arbel %p RTR2RTS_QPEE failed: %s\n", + arbel, strerror ( rc ) ); + goto err_rtr2rts_qpee; + } + qp->dev_priv = arbel_qp; return 0; + err_rtr2rts_qpee: + err_init2rtr_qpee: + arbel_cmd_2rst_qpee ( arbel, qp->qpn ); + err_rst2init_qpee: + MLX_FILL_1 ( send_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + MLX_FILL_1 ( recv_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + free_dma ( arbel_qp->recv.wqe, arbel_qp->recv.wqe_size ); + err_create_recv_wq: + free_dma ( arbel_qp->send.wqe, arbel_qp->send.wqe_size ); + err_create_send_wq: + free ( arbel_qp ); + err_arbel_qp: + arbel_free_qn_offset ( arbel->qp_inuse, qpn_offset ); err_qpn_offset: return rc; } +/** + * Destroy queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + */ +static void arbel_destroy_qp ( struct ib_device *ibdev, + struct ib_queue_pair *qp ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_queue_pair *arbel_qp = qp->dev_priv; + struct arbelprm_qp_db_record *send_db_rec; + struct arbelprm_qp_db_record *recv_db_rec; + int qpn_offset; + int rc; + + /* Take ownership back from hardware */ + if ( ( rc = arbel_cmd_2rst_qpee ( arbel, qp->qpn ) ) != 0 ) { + DBGC ( arbel, "Arbel %p FATAL 2RST_QPEE failed on QPN %#lx: " + "%s\n", arbel, qp->qpn, strerror ( rc ) ); + /* Leak memory and return; at least we avoid corruption */ + return; + } + + /* Clear doorbell records */ + send_db_rec = &arbel->db_rec[arbel_qp->send.doorbell_idx].qp; + recv_db_rec = &arbel->db_rec[arbel_qp->recv.doorbell_idx].qp; + MLX_FILL_1 ( send_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + MLX_FILL_1 ( recv_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + + /* Free memory */ + free_dma ( arbel_qp->send.wqe, arbel_qp->send.wqe_size ); + free_dma ( arbel_qp->recv.wqe, arbel_qp->recv.wqe_size ); + free ( arbel_qp ); + + /* Mark queue number as free */ + qpn_offset = ( qp->qpn - ARBEL_QPN_BASE - arbel->limits.reserved_qps ); + arbel_free_qn_offset ( arbel->qp_inuse, qpn_offset ); + + qp->dev_priv = NULL; +} + /*************************************************************************** * * Work request operations @@ -966,6 +1118,8 @@ static void arbel_poll_cq ( struct ib_device *ibdev, static struct ib_device_operations arbel_ib_operations = { .create_cq = arbel_create_cq, .destroy_cq = arbel_destroy_cq, + .create_qp = arbel_create_qp, + .destroy_qp = arbel_destroy_qp, .post_send = arbel_post_send, .post_recv = arbel_post_recv, .poll_cq = arbel_poll_cq, @@ -1048,6 +1202,7 @@ static int arbel_probe ( struct pci_device *pci, &static_ipoib_send_cq.work_queues ); list_add ( &static_ipoib_qp.recv.list, &static_ipoib_recv_cq.work_queues ); + static_ibdev.op = &arbel_ib_operations; /* Get device limits */ if ( ( rc = arbel_cmd_query_dev_lim ( arbel, &dev_lim ) ) != 0 ) { diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index d7f8b4ab2..4868f7174 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -174,8 +174,7 @@ struct ib_address_vector { * These represent a subset of the Infiniband Verbs. */ struct ib_device_operations { - /** - * Create completion queue + /** Create completion queue * * @v ibdev Infiniband device * @v cq Completion queue @@ -183,14 +182,28 @@ struct ib_device_operations { */ int ( * create_cq ) ( struct ib_device *ibdev, struct ib_completion_queue *cq ); - /** - * Destroy completion queue + /** Destroy completion queue * * @v ibdev Infiniband device * @v cq Completion queue */ void ( * destroy_cq ) ( struct ib_device *ibdev, struct ib_completion_queue *cq ); + /** Create queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @ret rc Return status code + */ + int ( * create_qp ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp ); + /** Destroy queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + */ + void ( * destroy_qp ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp ); /** Post send work queue entry * * @v ibdev Infiniband device @@ -247,7 +260,16 @@ struct ib_device { void *dev_priv; }; - +extern struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev, + unsigned int num_cqes ); +extern void ib_destroy_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ); +extern struct ib_queue_pair * +ib_create_qp ( struct ib_device *ibdev, unsigned int num_send_wqes, + struct ib_completion_queue *send_cq, unsigned int num_recv_wqes, + struct ib_completion_queue *recv_cq ); +extern void ib_destroy_qp ( struct ib_device *ibdev, + struct ib_queue_pair *qp ); extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, unsigned long qpn, int is_send ); diff --git a/src/net/infiniband.c b/src/net/infiniband.c index 2a29c5b2f..9a0692eec 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -58,8 +58,8 @@ struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev, /* Perform device-specific initialisation and get CQN */ if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) { - DBGC ( ibdev, "IBDEV %p could not initialise CQ: %s\n", - ibdev, strerror ( rc ) ); + DBGC ( ibdev, "IBDEV %p could not initialise completion " + "queue: %s\n", ibdev, strerror ( rc ) ); free ( cq ); return NULL; } @@ -84,6 +84,74 @@ void ib_destroy_cq ( struct ib_device *ibdev, free ( cq ); } +/** + * Create queue pair + * + * @v ibdev Infiniband device + * @v num_send_wqes Number of send work queue entries + * @v send_cq Send completion queue + * @v num_recv_wqes Number of receive work queue entries + * @v recv_cq Receive completion queue + * @ret qp Queue pair + */ +struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, + unsigned int num_send_wqes, + struct ib_completion_queue *send_cq, + unsigned int num_recv_wqes, + struct ib_completion_queue *recv_cq ) { + struct ib_queue_pair *qp; + int rc; + + DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev ); + + /* Allocate and initialise data structure */ + qp = zalloc ( sizeof ( *qp ) + + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) + + ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) ); + if ( ! qp ) + return NULL; + qp->send.qp = qp; + qp->send.is_send = 1; + qp->send.cq = send_cq; + list_add ( &qp->send.list, &send_cq->work_queues ); + qp->send.num_wqes = num_send_wqes; + qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) ); + qp->recv.qp = qp; + qp->recv.cq = recv_cq; + list_add ( &qp->recv.list, &recv_cq->work_queues ); + qp->recv.num_wqes = num_recv_wqes; + qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) + + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) )); + + /* Perform device-specific initialisation and get QPN */ + if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not initialise queue pair: " + "%s\n", ibdev, strerror ( rc ) ); + free ( qp ); + return NULL; + } + + DBGC ( ibdev, "IBDEV %p created queue pair %#lx\n", + ibdev, qp->qpn ); + return qp; +} + +/** + * Destroy queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + */ +void ib_destroy_qp ( struct ib_device *ibdev, + struct ib_queue_pair *qp ) { + DBGC ( ibdev, "IBDEV %p destroying queue pair %#lx\n", + ibdev, qp->qpn ); + ibdev->op->destroy_qp ( ibdev, qp ); + free ( qp ); +} + + + /** * Find work queue belonging to completion queue * -- cgit v1.2.3-55-g7522 From 7e85f0d296f1ef908a6eb521f630b396108ffef9 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 20:54:21 +0100 Subject: create_qp() and destroy_qp() now written (but not tested). --- src/drivers/net/mlx_ipoib/arbel.h | 7 +++ src/drivers/net/mlx_ipoib/bit_ops.h | 23 +++++++ src/drivers/net/mlx_ipoib/ib_driver.h | 2 +- src/drivers/net/mlx_ipoib/mt25218.c | 112 +++++++++++++++++++++++++++++----- src/include/gpxe/infiniband.h | 4 +- src/net/infiniband.c | 5 +- 6 files changed, 135 insertions(+), 18 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 2ef446fa6..47380dedf 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -34,6 +34,12 @@ #define ARBEL_HCR_RTR2RTS_QPEE 0x001b #define ARBEL_HCR_2RST_QPEE 0x0021 +/* Service types */ +#define ARBEL_ST_UD 0x01 + +/* MTUs */ +#define ARBEL_MTU_2048 0x04 + /* * Wrapper structures for hardware datatypes * @@ -46,6 +52,7 @@ struct MLX_DECLARE_STRUCT ( arbelprm_cq_arm_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_qp_ee_state_transitions ); struct MLX_DECLARE_STRUCT ( arbelprm_query_dev_lim ); struct MLX_DECLARE_STRUCT ( arbelprm_queue_pair_ee_context_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_recv_wqe_segment_next ); diff --git a/src/drivers/net/mlx_ipoib/bit_ops.h b/src/drivers/net/mlx_ipoib/bit_ops.h index 960d06683..8b81bfcc3 100644 --- a/src/drivers/net/mlx_ipoib/bit_ops.h +++ b/src/drivers/net/mlx_ipoib/bit_ops.h @@ -204,6 +204,14 @@ struct addr_64_st { ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ MLX_ASSEMBLE_3 ( _structure_st, _index, __VA_ARGS__ ) ) +#define MLX_ASSEMBLE_5( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_4 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_ASSEMBLE_6( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_5 ( _structure_st, _index, __VA_ARGS__ ) ) + /* * Build native-endian (positive) dword bitmasks from named fields * @@ -225,6 +233,14 @@ struct addr_64_st { ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ MLX_MASK_3 ( _structure_st, _index, __VA_ARGS__ ) ) +#define MLX_MASK_5( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_4 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_MASK_6( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_5 ( _structure_st, _index, __VA_ARGS__ ) ) + /* * Populate big-endian dwords from named fields and values * @@ -253,6 +269,13 @@ struct addr_64_st { MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_4 ( MLX_PSEUDO_STRUCT ( _ptr ),\ _index, __VA_ARGS__ ) ) +#define MLX_FILL_5( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_5 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) + +#define MLX_FILL_6( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_6 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) /* * Modify big-endian dword using named field and value diff --git a/src/drivers/net/mlx_ipoib/ib_driver.h b/src/drivers/net/mlx_ipoib/ib_driver.h index 57c028201..5ee46534d 100644 --- a/src/drivers/net/mlx_ipoib/ib_driver.h +++ b/src/drivers/net/mlx_ipoib/ib_driver.h @@ -55,7 +55,7 @@ enum { }; enum { - MADS_SND_CQN_SN, + MADS_SND_CQN_SN = 4, MADS_RCV_CQN_SN, IPOIB_SND_CQN_SN, IPOIB_RCV_CQN_SN, diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 383689d2a..8d6020dbb 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -466,7 +466,7 @@ arbel_cmd_hw2sw_cq ( struct arbel *arbel, unsigned long cqn ) { static inline int arbel_cmd_rst2init_qpee ( struct arbel *arbel, unsigned long qpn, - struct arbelprm_queue_pair_ee_context_entry *ctx ) { + const struct arbelprm_qp_ee_state_transitions *ctx ){ return arbel_cmd ( arbel, ARBEL_HCR_IN_CMD ( ARBEL_HCR_RST2INIT_QPEE, 1, sizeof ( *ctx ) ), @@ -475,7 +475,7 @@ arbel_cmd_rst2init_qpee ( struct arbel *arbel, unsigned long qpn, static inline int arbel_cmd_init2rtr_qpee ( struct arbel *arbel, unsigned long qpn, - struct arbelprm_queue_pair_ee_context_entry *ctx ) { + const struct arbelprm_qp_ee_state_transitions *ctx ){ return arbel_cmd ( arbel, ARBEL_HCR_IN_CMD ( ARBEL_HCR_INIT2RTR_QPEE, 1, sizeof ( *ctx ) ), @@ -484,7 +484,7 @@ arbel_cmd_init2rtr_qpee ( struct arbel *arbel, unsigned long qpn, static inline int arbel_cmd_rtr2rts_qpee ( struct arbel *arbel, unsigned long qpn, - struct arbelprm_queue_pair_ee_context_entry *ctx ) { + const struct arbelprm_qp_ee_state_transitions *ctx ) { return arbel_cmd ( arbel, ARBEL_HCR_IN_CMD ( ARBEL_HCR_RTR2RTS_QPEE, 1, sizeof ( *ctx ) ), @@ -574,7 +574,7 @@ static int arbel_create_cq ( struct ib_device *ibdev, virt_to_bus ( arbel_cq->cqe ) ); MLX_FILL_2 ( &cqctx, 3, usr_page, arbel->limits.reserved_uars, - log_cq_size, ( fls ( cq->num_cqes ) - 1 ) ); + log_cq_size, fls ( cq->num_cqes - 1 ) ); MLX_FILL_1 ( &cqctx, 5, c_eqn, arbel->eqn ); MLX_FILL_1 ( &cqctx, 6, pd, ARBEL_GLOBAL_PD ); MLX_FILL_1 ( &cqctx, 7, l_key, arbel->reserved_lkey ); @@ -651,35 +651,76 @@ static void arbel_destroy_cq ( struct ib_device *ibdev, *************************************************************************** */ +/** + * Create send work queue + * + * @v arbel_send_wq Send work queue + * @v num_wqes Number of work queue entries + * @ret rc Return status code + */ static int arbel_create_send_wq ( struct arbel_send_work_queue *arbel_send_wq, unsigned int num_wqes ) { + struct arbelprm_ud_send_wqe *wqe; + struct arbelprm_ud_send_wqe *next_wqe; + unsigned int wqe_idx_mask; + unsigned int i; + /* Allocate work queue */ arbel_send_wq->wqe_size = ( num_wqes * sizeof ( arbel_send_wq->wqe[0] ) ); arbel_send_wq->wqe = malloc_dma ( arbel_send_wq->wqe_size, sizeof ( arbel_send_wq->wqe[0] ) ); if ( ! arbel_send_wq->wqe ) return -ENOMEM; - - // initialise (prelink?) + memset ( arbel_send_wq->wqe, 0, arbel_send_wq->wqe_size ); + + /* Link work queue entries */ + wqe_idx_mask = ( num_wqes - 1 ); + for ( i = 0 ; i < num_wqes ; i++ ) { + wqe = &arbel_send_wq->wqe[i].ud; + next_wqe = &arbel_send_wq->wqe[ ( i + 1 ) & wqe_idx_mask ].ud; + MLX_FILL_1 ( &wqe->next, 0, nda_31_6, + ( virt_to_bus ( next_wqe ) >> 6 ) ); + } + + return 0; } +/** + * Create receive work queue + * + * @v arbel_recv_wq Receive work queue + * @v num_wqes Number of work queue entries + * @ret rc Return status code + */ static int arbel_create_recv_wq ( struct arbel_recv_work_queue *arbel_recv_wq, unsigned int num_wqes ) { + struct arbelprm_recv_wqe *wqe; + struct arbelprm_recv_wqe *next_wqe; + unsigned int wqe_idx_mask; + unsigned int i; + /* Allocate work queue */ arbel_recv_wq->wqe_size = ( num_wqes * sizeof ( arbel_recv_wq->wqe[0] ) ); arbel_recv_wq->wqe = malloc_dma ( arbel_recv_wq->wqe_size, sizeof ( arbel_recv_wq->wqe[0] ) ); if ( ! arbel_recv_wq->wqe ) return -ENOMEM; - - // initialise (prelink?) + memset ( arbel_recv_wq->wqe, 0, arbel_recv_wq->wqe_size ); + + /* Link work queue entries */ + wqe_idx_mask = ( num_wqes - 1 ); + for ( i = 0 ; i < num_wqes ; i++ ) { + wqe = &arbel_recv_wq->wqe[i].recv; + next_wqe = &arbel_recv_wq->wqe[( i + 1 ) & wqe_idx_mask].recv; + MLX_FILL_1 ( &wqe->next, 0, nda_31_6, + ( virt_to_bus ( next_wqe ) >> 6 ) ); + } + + return 0; } - - - /** * Create queue pair * @@ -691,7 +732,7 @@ static int arbel_create_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) { struct arbel *arbel = ibdev->dev_priv; struct arbel_queue_pair *arbel_qp; - struct arbelprm_queue_pair_ee_context_entry qpctx; + struct arbelprm_qp_ee_state_transitions qpctx; struct arbelprm_qp_db_record *send_db_rec; struct arbelprm_qp_db_record *recv_db_rec; int qpn_offset; @@ -737,17 +778,53 @@ static int arbel_create_qp ( struct ib_device *ibdev, /* Hand queue over to hardware */ memset ( &qpctx, 0, sizeof ( qpctx ) ); - // ... fill in context + MLX_FILL_3 ( &qpctx, 2, + qpc_eec_data.de, 1, + qpc_eec_data.pm_state, 0x03 /* Always 0x03 for UD */, + qpc_eec_data.st, ARBEL_ST_UD ); + MLX_FILL_6 ( &qpctx, 4, + qpc_eec_data.mtu, ARBEL_MTU_2048, + qpc_eec_data.msg_max, 11 /* 2^11 = 2048 */, + qpc_eec_data.log_rq_size, fls ( qp->recv.num_wqes - 1 ), + qpc_eec_data.log_rq_stride, + ( fls ( sizeof ( arbel_qp->send.wqe[0] ) - 1 ) - 4 ), + qpc_eec_data.log_sq_size, fls ( qp->send.num_wqes - 1 ), + qpc_eec_data.log_sq_stride, + ( fls ( sizeof ( arbel_qp->recv.wqe[0] ) - 1 ) - 4 ) ); + MLX_FILL_1 ( &qpctx, 5, + qpc_eec_data.usr_page, arbel->limits.reserved_uars ); + MLX_FILL_1 ( &qpctx, 10, qpc_eec_data.primary_address_path.port_number, + PXE_IB_PORT ); + MLX_FILL_1 ( &qpctx, 27, qpc_eec_data.pd, ARBEL_GLOBAL_PD ); + MLX_FILL_1 ( &qpctx, 29, qpc_eec_data.wqe_lkey, arbel->reserved_lkey ); + MLX_FILL_1 ( &qpctx, 30, qpc_eec_data.ssc, 1 ); + MLX_FILL_1 ( &qpctx, 33, qpc_eec_data.cqn_snd, qp->send.cq->cqn ); + MLX_FILL_1 ( &qpctx, 34, qpc_eec_data.snd_wqe_base_adr_l, + ( virt_to_bus ( arbel_qp->send.wqe ) >> 6 ) ); + MLX_FILL_1 ( &qpctx, 35, qpc_eec_data.snd_db_record_index, + arbel_qp->send.doorbell_idx ); + MLX_FILL_1 ( &qpctx, 38, qpc_eec_data.rsc, 1 ); + MLX_FILL_1 ( &qpctx, 41, qpc_eec_data.cqn_rcv, qp->recv.cq->cqn ); + MLX_FILL_1 ( &qpctx, 42, qpc_eec_data.rcv_wqe_base_adr_l, + ( virt_to_bus ( arbel_qp->recv.wqe ) >> 6 ) ); + MLX_FILL_1 ( &qpctx, 43, qpc_eec_data.rcv_db_record_index, + arbel_qp->recv.doorbell_idx ); + MLX_FILL_1 ( &qpctx, 44, qpc_eec_data.q_key, qp->qkey ); if ( ( rc = arbel_cmd_rst2init_qpee ( arbel, qp->qpn, &qpctx )) != 0 ){ DBGC ( arbel, "Arbel %p RST2INIT_QPEE failed: %s\n", arbel, strerror ( rc ) ); goto err_rst2init_qpee; } + memset ( &qpctx, 0, sizeof ( qpctx ) ); + MLX_FILL_2 ( &qpctx, 4, + qpc_eec_data.mtu, ARBEL_MTU_2048, + qpc_eec_data.msg_max, 11 /* 2^11 = 2048 */ ); if ( ( rc = arbel_cmd_init2rtr_qpee ( arbel, qp->qpn, &qpctx )) != 0 ){ DBGC ( arbel, "Arbel %p INIT2RTR_QPEE failed: %s\n", arbel, strerror ( rc ) ); goto err_init2rtr_qpee; } + memset ( &qpctx, 0, sizeof ( qpctx ) ); if ( ( rc = arbel_cmd_rtr2rts_qpee ( arbel, qp->qpn, &qpctx ) ) != 0 ){ DBGC ( arbel, "Arbel %p RTR2RTS_QPEE failed: %s\n", arbel, strerror ( rc ) ); @@ -1215,8 +1292,13 @@ static int arbel_probe ( struct pci_device *pci, ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); arbel->limits.reserved_qps = ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); - DBG ( "Device limits:\n "); - DBG_HD ( &dev_lim, sizeof ( dev_lim ) ); + + DBG ( "MADS SND CQN = %#lx\n", dev_ib_data.mads_qp.snd_cq.cqn ); + struct ib_completion_queue *test_cq; + test_cq = ib_create_cq ( &static_ibdev, 32 ); + if ( test_cq ) { + DBG ( "Woot: create_cq() passed!\n" ); + } /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 4868f7174..632a214e7 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -97,6 +97,8 @@ struct ib_work_queue { struct ib_queue_pair { /** Queue Pair Number */ unsigned long qpn; + /** Queue key */ + unsigned long qkey; /** Send queue */ struct ib_work_queue send; /** Receive queue */ @@ -267,7 +269,7 @@ extern void ib_destroy_cq ( struct ib_device *ibdev, extern struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, unsigned int num_send_wqes, struct ib_completion_queue *send_cq, unsigned int num_recv_wqes, - struct ib_completion_queue *recv_cq ); + struct ib_completion_queue *recv_cq, unsigned long qkey ); extern void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ); extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, diff --git a/src/net/infiniband.c b/src/net/infiniband.c index 9a0692eec..a9ca0e31d 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -92,13 +92,15 @@ void ib_destroy_cq ( struct ib_device *ibdev, * @v send_cq Send completion queue * @v num_recv_wqes Number of receive work queue entries * @v recv_cq Receive completion queue + * @v qkey Queue key * @ret qp Queue pair */ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, unsigned int num_send_wqes, struct ib_completion_queue *send_cq, unsigned int num_recv_wqes, - struct ib_completion_queue *recv_cq ) { + struct ib_completion_queue *recv_cq, + unsigned long qkey ) { struct ib_queue_pair *qp; int rc; @@ -110,6 +112,7 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) ); if ( ! qp ) return NULL; + qp->qkey = qkey; qp->send.qp = qp; qp->send.is_send = 1; qp->send.cq = send_cq; -- cgit v1.2.3-55-g7522 From 3c6a6bdc5d78ff8e1ee2ff190183bbea33f0579f Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 00:24:44 +0100 Subject: Multicast join now works. --- src/drivers/net/mlx_ipoib/arbel.h | 25 +++++-- src/drivers/net/mlx_ipoib/mt25218.c | 136 ++++++++++++++++++++++++++++++++++++ src/include/gpxe/infiniband.h | 47 +++++++++++++ 3 files changed, 204 insertions(+), 4 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index cd6a48eb9..c4b536a56 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -33,6 +33,9 @@ #define ARBEL_HCR_INIT2RTR_QPEE 0x001a #define ARBEL_HCR_RTR2RTS_QPEE 0x001b #define ARBEL_HCR_2RST_QPEE 0x0021 +#define ARBEL_HCR_READ_MGM 0x0025 +#define ARBEL_HCR_WRITE_MGM 0x0026 +#define ARBEL_HCR_MGID_HASH 0x0027 /* Service types */ #define ARBEL_ST_UD 0x03 @@ -42,6 +45,17 @@ #define ARBEL_INVALID_LKEY 0x00000100UL +/* + * Datatypes that seem to be missing from the autogenerated documentation + * + */ +struct arbelprm_mgm_hash_st { + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ + pseudo_bit_t hash[0x00010]; + pseudo_bit_t reserved1[0x00010]; +}; + /* * Wrapper structures for hardware datatypes * @@ -53,6 +67,8 @@ struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_arm_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register ); +struct MLX_DECLARE_STRUCT ( arbelprm_mgm_entry ); +struct MLX_DECLARE_STRUCT ( arbelprm_mgm_hash ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_ee_state_transitions ); struct MLX_DECLARE_STRUCT ( arbelprm_query_dev_lim ); @@ -266,7 +282,8 @@ struct arbel { #define ARBEL_HCR_OUT_LEN( _command ) ( ( (_command) >> 21 ) & 0x7fc ) /** Build HCR command from component parts */ -#define ARBEL_HCR_CMD( _opcode, _in_mbox, _in_len, _out_mbox, _out_len ) \ +#define ARBEL_HCR_INOUT_CMD( _opcode, _in_mbox, _in_len, \ + _out_mbox, _out_len ) \ ( (_opcode) | \ ( (_in_mbox) ? ARBEL_HCR_IN_MBOX : 0 ) | \ ( ( (_in_len) / 4 ) << 14 ) | \ @@ -274,13 +291,13 @@ struct arbel { ( ( (_out_len) / 4 ) << 23 ) ) #define ARBEL_HCR_IN_CMD( _opcode, _in_mbox, _in_len ) \ - ARBEL_HCR_CMD ( _opcode, _in_mbox, _in_len, 0, 0 ) + ARBEL_HCR_INOUT_CMD ( _opcode, _in_mbox, _in_len, 0, 0 ) #define ARBEL_HCR_OUT_CMD( _opcode, _out_mbox, _out_len ) \ - ARBEL_HCR_CMD ( _opcode, 0, 0, _out_mbox, _out_len ) + ARBEL_HCR_INOUT_CMD ( _opcode, 0, 0, _out_mbox, _out_len ) #define ARBEL_HCR_VOID_CMD( _opcode ) \ - ARBEL_HCR_CMD ( _opcode, 0, 0, 0, 0 ) + ARBEL_HCR_INOUT_CMD ( _opcode, 0, 0, 0, 0 ) /* * Doorbell record allocation diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 6cef59270..13b7d78b7 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -549,6 +549,34 @@ arbel_cmd_2rst_qpee ( struct arbel *arbel, unsigned long qpn ) { 0x03, NULL, qpn, NULL ); } +static inline int +arbel_cmd_read_mgm ( struct arbel *arbel, unsigned int index, + struct arbelprm_mgm_entry *mgm ) { + return arbel_cmd ( arbel, + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_READ_MGM, + 1, sizeof ( *mgm ) ), + 0, NULL, index, mgm ); +} + +static inline int +arbel_cmd_write_mgm ( struct arbel *arbel, unsigned int index, + const struct arbelprm_mgm_entry *mgm ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_WRITE_MGM, + 1, sizeof ( *mgm ) ), + 0, mgm, index, NULL ); +} + +static inline int +arbel_cmd_mgid_hash ( struct arbel *arbel, const struct ib_gid *gid, + struct arbelprm_mgm_hash *hash ) { + return arbel_cmd ( arbel, + ARBEL_HCR_INOUT_CMD ( ARBEL_HCR_MGID_HASH, + 1, sizeof ( *gid ), + 0, sizeof ( *hash ) ), + 0, gid, 0, hash ); +} + /*************************************************************************** * * Completion queue operations @@ -1253,6 +1281,104 @@ static void arbel_poll_cq ( struct ib_device *ibdev, } } +/*************************************************************************** + * + * Multicast group operations + * + *************************************************************************** + */ + +/** + * Attach to multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + * @ret rc Return status code + */ +static int arbel_mcast_attach ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_gid *gid ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbelprm_mgm_hash hash; + struct arbelprm_mgm_entry mgm; + unsigned int index; + int rc; + + /* Generate hash table index */ + if ( ( rc = arbel_cmd_mgid_hash ( arbel, gid, &hash ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not hash GID: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + index = MLX_GET ( &hash, hash ); + + /* Check for existing hash table entry */ + if ( ( rc = arbel_cmd_read_mgm ( arbel, index, &mgm ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not read MGM %#x: %s\n", + arbel, index, strerror ( rc ) ); + return rc; + } + if ( MLX_GET ( &mgm, mgmqp_0.qi ) != 0 ) { + /* FIXME: this implementation allows only a single QP + * per multicast group, and doesn't handle hash + * collisions. Sufficient for IPoIB but may need to + * be extended in future. + */ + DBGC ( arbel, "Arbel %p MGID index %#x already in use\n", + arbel, index ); + return -EBUSY; + } + + /* Update hash table entry */ + MLX_FILL_2 ( &mgm, 8, + mgmqp_0.qpn_i, qp->qpn, + mgmqp_0.qi, 1 ); + memcpy ( &mgm.u.dwords[4], gid, sizeof ( *gid ) ); + if ( ( rc = arbel_cmd_write_mgm ( arbel, index, &mgm ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not write MGM %#x: %s\n", + arbel, index, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Detach from multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + */ +static void arbel_mcast_detach ( struct ib_device *ibdev, + struct ib_queue_pair *qp __unused, + struct ib_gid *gid ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbelprm_mgm_hash hash; + struct arbelprm_mgm_entry mgm; + unsigned int index; + int rc; + + /* Generate hash table index */ + if ( ( rc = arbel_cmd_mgid_hash ( arbel, gid, &hash ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not hash GID: %s\n", + arbel, strerror ( rc ) ); + return; + } + index = MLX_GET ( &hash, hash ); + + /* Clear hash table entry */ + memset ( &mgm, 0, sizeof ( mgm ) ); + if ( ( rc = arbel_cmd_write_mgm ( arbel, index, &mgm ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not write MGM %#x: %s\n", + arbel, index, strerror ( rc ) ); + return; + } +} + + + /** Arbel Infiniband operations */ static struct ib_device_operations arbel_ib_operations = { .create_cq = arbel_create_cq, @@ -1262,6 +1388,8 @@ static struct ib_device_operations arbel_ib_operations = { .post_send = arbel_post_send, .post_recv = arbel_post_recv, .poll_cq = arbel_poll_cq, + .mcast_attach = arbel_mcast_attach, + .mcast_detach = arbel_mcast_detach, }; /** @@ -1379,6 +1507,14 @@ static int arbel_probe ( struct pci_device *pci, return -EIO; } mlx->own_qp->owner_priv = netdev; + struct ib_gid *bcast_gid = ( struct ib_gid * ) &ib_data.bcast_gid; + if ( ( rc = ib_mcast_attach ( ibdev, mlx->own_qp, + bcast_gid ) ) != 0 ) { + DBG ( "Could not attach to broadcast GID: %s\n", + strerror ( rc ) ); + return rc; + } + mac = ( ( struct ib_mac * ) netdev->ll_addr ); mac->qpn = htonl ( mlx->own_qp->qpn ); diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 632a214e7..6a38a1b8f 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -252,6 +252,27 @@ struct ib_device_operations { struct ib_completion_queue *cq, ib_completer_t complete_send, ib_completer_t complete_recv ); + /** + * Attach to multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + * @ret rc Return status code + */ + int ( * mcast_attach ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_gid *gid ); + /** + * Detach from multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + */ + void ( * mcast_detach ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_gid *gid ); }; /** An Infiniband device */ @@ -275,6 +296,32 @@ extern void ib_destroy_qp ( struct ib_device *ibdev, extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, unsigned long qpn, int is_send ); +/** + * Attach to multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + * @ret rc Return status code + */ +static inline __attribute__ (( always_inline )) int +ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *gid ) { + return ibdev->op->mcast_attach ( ibdev, qp, gid ); +} + +/** + * Detach from multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + */ +static inline __attribute__ (( always_inline )) void +ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *gid ) { + ibdev->op->mcast_detach ( ibdev, qp, gid ); +} extern struct ll_protocol infiniband_protocol; -- cgit v1.2.3-55-g7522 From 67836430e6a434cf8e3d6637bcd27b250d87003f Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 02:54:15 +0100 Subject: Read port GID directly using MAD IFC. --- src/drivers/net/mlx_ipoib/arbel.h | 7 ++ src/drivers/net/mlx_ipoib/mt25218.c | 135 ++++++++++++++++++++++++++++++++---- src/include/gpxe/infiniband.h | 115 ++++++++++++++++++++++++++++++ 3 files changed, 242 insertions(+), 15 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index c4b536a56..28893f511 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -33,6 +33,7 @@ #define ARBEL_HCR_INIT2RTR_QPEE 0x001a #define ARBEL_HCR_RTR2RTS_QPEE 0x001b #define ARBEL_HCR_2RST_QPEE 0x0021 +#define ARBEL_HCR_MAD_IFC 0x0024 #define ARBEL_HCR_READ_MGM 0x0025 #define ARBEL_HCR_WRITE_MGM 0x0026 #define ARBEL_HCR_MGID_HASH 0x0027 @@ -67,6 +68,7 @@ struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_arm_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register ); +struct MLX_DECLARE_STRUCT ( arbelprm_mad_ifc ); struct MLX_DECLARE_STRUCT ( arbelprm_mgm_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_mgm_hash ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); @@ -126,6 +128,11 @@ union arbelprm_doorbell_register { uint32_t dword[2]; } __attribute__ (( packed )); +union arbelprm_mad { + struct arbelprm_mad_ifc ifc; + union ib_mad mad; +} __attribute__ (( packed )); + /* * gPXE-specific definitions * diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 13b7d78b7..aed6d208a 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -549,6 +549,15 @@ arbel_cmd_2rst_qpee ( struct arbel *arbel, unsigned long qpn ) { 0x03, NULL, qpn, NULL ); } +static inline int +arbel_cmd_mad_ifc ( struct arbel *arbel, union arbelprm_mad *mad ) { + return arbel_cmd ( arbel, + ARBEL_HCR_INOUT_CMD ( ARBEL_HCR_MAD_IFC, + 1, sizeof ( *mad ), + 1, sizeof ( *mad ) ), + 0x03, mad, PXE_IB_PORT, mad ); +} + static inline int arbel_cmd_read_mgm ( struct arbel *arbel, unsigned int index, struct arbelprm_mgm_entry *mgm ) { @@ -1233,6 +1242,15 @@ static int arbel_complete ( struct ib_device *ibdev, return rc; } +/** + * Drain event queue + * + * @v arbel Arbel device + */ +static void arbel_drain_eq ( struct arbel *arbel ) { +#warning "drain the event queue" +} + /** * Poll completion queue * @@ -1252,6 +1270,9 @@ static void arbel_poll_cq ( struct ib_device *ibdev, unsigned int cqe_idx_mask; int rc; + /* Drain the event queue */ + arbel_drain_eq ( arbel ); + while ( 1 ) { /* Look for completion entry */ cqe_idx_mask = ( cq->num_cqes - 1 ); @@ -1377,8 +1398,6 @@ static void arbel_mcast_detach ( struct ib_device *ibdev, } } - - /** Arbel Infiniband operations */ static struct ib_device_operations arbel_ib_operations = { .create_cq = arbel_create_cq, @@ -1392,20 +1411,83 @@ static struct ib_device_operations arbel_ib_operations = { .mcast_detach = arbel_mcast_detach, }; -/** - * Remove PCI device - * - * @v pci PCI device - */ -static void arbel_remove ( struct pci_device *pci ) { - struct net_device *netdev = pci_get_drvdata ( pci ); - unregister_netdev ( netdev ); - ib_driver_close ( 0 ); - netdev_nullify ( netdev ); - netdev_put ( netdev ); +static int arbel_mad_ifc ( struct arbel *arbel, + union arbelprm_mad *mad ) { + struct ib_mad_hdr *hdr = &mad->mad.mad_hdr; + int rc; + + hdr->base_version = IB_MGMT_BASE_VERSION; + if ( ( rc = arbel_cmd_mad_ifc ( arbel, mad ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not issue MAD IFC: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + if ( hdr->status != 0 ) { + DBGC ( arbel, "Arbel %p MAD IFC status %04x\n", + arbel, ntohs ( hdr->status ) ); + return -EIO; + } + return 0; +} + +static int arbel_get_port_info ( struct arbel *arbel, + struct ib_mad_port_info *port_info ) { + union arbelprm_mad mad; + struct ib_mad_hdr *hdr = &mad.mad.mad_hdr; + int rc; + + memset ( &mad, 0, sizeof ( mad ) ); + hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + hdr->class_version = 1; + hdr->method = IB_MGMT_METHOD_GET; + hdr->attr_id = htons ( IB_SMP_ATTR_PORT_INFO ); + hdr->attr_mod = htonl ( PXE_IB_PORT ); + if ( ( rc = arbel_mad_ifc ( arbel, &mad ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get port info: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + memcpy ( port_info, &mad.mad.port_info, sizeof ( *port_info ) ); + return 0; +} + +static int arbel_get_guid_info ( struct arbel *arbel, + struct ib_mad_guid_info *guid_info ) { + union arbelprm_mad mad; + struct ib_mad_hdr *hdr = &mad.mad.mad_hdr; + int rc; + + memset ( &mad, 0, sizeof ( mad ) ); + hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + hdr->class_version = 1; + hdr->method = IB_MGMT_METHOD_GET; + hdr->attr_id = htons ( IB_SMP_ATTR_GUID_INFO ); + if ( ( rc = arbel_mad_ifc ( arbel, &mad ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get GUID info: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + memcpy ( guid_info, &mad.mad.guid_info, sizeof ( *guid_info ) ); + return 0; } +static int arbel_get_port_gid ( struct arbel *arbel, struct ib_gid *gid ) { + struct ib_mad_port_info port_info; + struct ib_mad_guid_info guid_info; + int rc; + + if ( ( rc = arbel_get_port_info ( arbel, &port_info ) ) != 0 ) + return rc; + if ( ( rc = arbel_get_guid_info ( arbel, &guid_info ) ) != 0 ) + return rc; + memcpy ( &gid->bytes[0], port_info.gid_prefix, 8 ); + memcpy ( &gid->bytes[8], guid_info.gid_local, 8 ); + return 0; +} + + + /** * Probe PCI device * @@ -1514,11 +1596,20 @@ static int arbel_probe ( struct pci_device *pci, strerror ( rc ) ); return rc; } - + + if ( ( rc = arbel_get_port_gid ( arbel, &ibdev->port_gid ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine port GID: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + + DBG ( "Port GID:\n" ); + DBG_HD ( &ibdev->port_gid, sizeof ( ibdev->port_gid ) ); + mac = ( ( struct ib_mac * ) netdev->ll_addr ); mac->qpn = htonl ( mlx->own_qp->qpn ); - memcpy ( &mac->gid, ib_data.port_gid.raw, sizeof ( mac->gid ) ); + memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) ); #endif #if 0 @@ -1545,6 +1636,20 @@ static int arbel_probe ( struct pci_device *pci, return rc; } +/** + * Remove PCI device + * + * @v pci PCI device + */ +static void arbel_remove ( struct pci_device *pci ) { + struct net_device *netdev = pci_get_drvdata ( pci ); + + unregister_netdev ( netdev ); + ib_driver_close ( 0 ); + netdev_nullify ( netdev ); + netdev_put ( netdev ); +} + static struct pci_device_id arbel_nics[] = { PCI_ROM ( 0x15b3, 0x6282, "MT25218", "MT25218 HCA driver" ), PCI_ROM ( 0x15b3, 0x6274, "MT25204", "MT25204 HCA driver" ), diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 6a38a1b8f..3f09808ca 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -277,6 +277,8 @@ struct ib_device_operations { /** An Infiniband device */ struct ib_device { + /** Port GID */ + struct ib_gid port_gid; /** Infiniband operations */ struct ib_device_operations *op; /** Device private data */ @@ -323,6 +325,119 @@ ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp, ibdev->op->mcast_detach ( ibdev, qp, gid ); } +/***************************************************************************** + * + * Management datagrams + * + * Portions Copyright (c) 2004 Mellanox Technologies Ltd. All rights + * reserved. + * + */ + +/* Management base version */ +#define IB_MGMT_BASE_VERSION 1 + +/* Management classes */ +#define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01 +#define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 0x81 +#define IB_MGMT_CLASS_SUBN_ADM 0x03 +#define IB_MGMT_CLASS_PERF_MGMT 0x04 +#define IB_MGMT_CLASS_BM 0x05 +#define IB_MGMT_CLASS_DEVICE_MGMT 0x06 +#define IB_MGMT_CLASS_CM 0x07 +#define IB_MGMT_CLASS_SNMP 0x08 +#define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30 +#define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F + +/* Management methods */ +#define IB_MGMT_METHOD_GET 0x01 +#define IB_MGMT_METHOD_SET 0x02 +#define IB_MGMT_METHOD_GET_RESP 0x81 +#define IB_MGMT_METHOD_SEND 0x03 +#define IB_MGMT_METHOD_TRAP 0x05 +#define IB_MGMT_METHOD_REPORT 0x06 +#define IB_MGMT_METHOD_REPORT_RESP 0x86 +#define IB_MGMT_METHOD_TRAP_REPRESS 0x07 +#define IB_MGMT_METHOD_DELETE 0x15 +#define IB_MGMT_METHOD_RESP 0x80 + +/* Subnet management attributes */ +#define IB_SMP_ATTR_NOTICE 0x0002 +#define IB_SMP_ATTR_NODE_DESC 0x0010 +#define IB_SMP_ATTR_NODE_INFO 0x0011 +#define IB_SMP_ATTR_SWITCH_INFO 0x0012 +#define IB_SMP_ATTR_GUID_INFO 0x0014 +#define IB_SMP_ATTR_PORT_INFO 0x0015 +#define IB_SMP_ATTR_PKEY_TABLE 0x0016 +#define IB_SMP_ATTR_SL_TO_VL_TABLE 0x0017 +#define IB_SMP_ATTR_VL_ARB_TABLE 0x0018 +#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE 0x0019 +#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE 0x001A +#define IB_SMP_ATTR_MCAST_FORWARD_TABLE 0x001B +#define IB_SMP_ATTR_SM_INFO 0x0020 +#define IB_SMP_ATTR_VENDOR_DIAG 0x0030 +#define IB_SMP_ATTR_LED_INFO 0x0031 +#define IB_SMP_ATTR_VENDOR_MASK 0xFF00 + +struct ib_mad_hdr { + uint8_t base_version; + uint8_t mgmt_class; + uint8_t class_version; + uint8_t method; + uint16_t status; + uint16_t class_specific; + uint64_t tid; + uint16_t attr_id; + uint16_t resv; + uint32_t attr_mod; +} __attribute__ (( packed )); + +struct ib_mad_data { + struct ib_mad_hdr mad_hdr; + uint8_t data[232]; +} __attribute__ (( packed )); + +struct ib_mad_guid_info { + struct ib_mad_hdr mad_hdr; + uint32_t mkey[2]; + uint32_t reserved[8]; + uint8_t gid_local[8]; +} __attribute__ (( packed )); + +struct ib_mad_port_info { + struct ib_mad_hdr mad_hdr; + uint32_t mkey[2]; + uint32_t reserved[8]; + uint32_t mkey2[2]; + uint8_t gid_prefix[8]; + uint16_t lid; + uint16_t mastersm_lid; + uint32_t cap_mask; + uint16_t diag_code; + uint16_t mkey_lease_period; + uint8_t local_port_num; + uint8_t link_width_enabled; + uint8_t link_width_supported; + uint8_t link_width_active; + uint8_t port_state__link_speed_supported; + uint8_t link_down_def_state__port_phys_state; + uint8_t lmc__r1__mkey_prot_bits; + uint8_t link_speed_enabled__link_speed_active; +} __attribute__ (( packed )); + +union ib_mad { + struct ib_mad_hdr mad_hdr; + struct ib_mad_data data; + struct ib_mad_guid_info guid_info; + struct ib_mad_port_info port_info; +} __attribute__ (( packed )); + + + + + + + extern struct ll_protocol infiniband_protocol; -- cgit v1.2.3-55-g7522 From 4e78a53cf26b85736123eee29d23d637b4a3883f Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 05:04:58 +0100 Subject: IPoIB code separated out to ipoib.c. --- src/drivers/net/ipoib.c | 411 +++++++++++++++++++++++++++++++++ src/drivers/net/mlx_ipoib/ib_driver.c | 5 +- src/drivers/net/mlx_ipoib/ib_driver.h | 2 +- src/drivers/net/mlx_ipoib/ib_mt25218.c | 2 +- src/drivers/net/mlx_ipoib/ipoib.c | 2 +- src/drivers/net/mlx_ipoib/mt25218.c | 143 +++++++++++- src/include/gpxe/errfile.h | 2 + src/include/gpxe/infiniband.h | 149 +++++++++--- src/include/gpxe/ipoib.h | 78 +++++++ src/net/infiniband.c | 32 ++- 10 files changed, 783 insertions(+), 43 deletions(-) create mode 100644 src/drivers/net/ipoib.c create mode 100644 src/include/gpxe/ipoib.h (limited to 'src/include') diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c new file mode 100644 index 000000000..9eed6b394 --- /dev/null +++ b/src/drivers/net/ipoib.c @@ -0,0 +1,411 @@ +/* + * Copyright (C) 2007 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** @file + * + * IP over Infiniband + */ + + + + + +extern unsigned long hack_ipoib_qkey; +extern struct ib_address_vector hack_ipoib_bcast_av; + + + +/** IPoIB MTU */ +#define IPOIB_MTU 2048 + +/** Number of IPoIB send work queue entries */ +#define IPOIB_NUM_SEND_WQES 8 + +/** Number of IPoIB receive work queue entries */ +#define IPOIB_NUM_RECV_WQES 8 + +/** Number of IPoIB completion entries */ +#define IPOIB_NUM_CQES 8 + +struct ipoib_device { + struct ib_device *ibdev; + struct ib_completion_queue *cq; + struct ib_queue_pair *qp; + unsigned int rx_fill; +}; + +/**************************************************************************** + * + * IPoIB link layer + * + **************************************************************************** + */ + +/** Broadcast IPoIB address */ +static struct ipoib_mac ipoib_broadcast = { + .gid = { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } }, +}; + +/** + * Transmit IPoIB packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v ll_dest Link-layer destination address + * + * Prepends the IPoIB link-layer header and transmits the packet. + */ +static int ipoib_tx ( struct io_buffer *iobuf, struct net_device *netdev, + struct net_protocol *net_protocol, + const void *ll_dest ) { + struct ipoib_hdr *ipoib_hdr = + iob_push ( iobuf, sizeof ( *ipoib_hdr ) ); + + /* Build IPoIB header */ + memcpy ( &ipoib_hdr->pseudo.peer, ll_dest, + sizeof ( ipoib_hdr->pseudo.peer ) ); + ipoib_hdr->real.proto = net_protocol->net_proto; + ipoib_hdr->real.reserved = 0; + + /* Hand off to network device */ + return netdev_tx ( netdev, iobuf ); +} + +/** + * Process received IPoIB packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * + * Strips off the IPoIB link-layer header and passes up to the + * network-layer protocol. + */ +static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { + struct ipoib_hdr *ipoib_hdr = iobuf->data; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) { + DBG ( "IPoIB packet too short (%d bytes)\n", + iob_len ( iobuf ) ); + free_iob ( iobuf ); + return -EINVAL; + } + + /* Strip off IPoIB header */ + iob_pull ( iobuf, sizeof ( *ipoib_hdr ) ); + + /* Hand off to network-layer protocol */ + return net_rx ( iobuf, netdev, ipoib_hdr->real.proto, + &ipoib_hdr->pseudo.peer ); +} + +/** + * Transcribe IPoIB address + * + * @v ll_addr Link-layer address + * @ret string Link-layer address in human-readable format + */ +const char * ipoib_ntoa ( const void *ll_addr ) { + static char buf[61]; + const uint8_t *ipoib_addr = ll_addr; + unsigned int i; + char *p = buf; + + for ( i = 0 ; i < IPOIB_ALEN ; i++ ) { + p += sprintf ( p, ":%02x", ipoib_addr[i] ); + } + return ( buf + 1 ); +} + +/** IPoIB protocol */ +struct ll_protocol ipoib_protocol __ll_protocol = { + .name = "IPoIB", + .ll_proto = htons ( ARPHRD_INFINIBAND ), + .ll_addr_len = IPOIB_ALEN, + .ll_header_len = IPOIB_HLEN, + .ll_broadcast = ( uint8_t * ) &ipoib_broadcast, + .tx = ipoib_tx, + .rx = ipoib_rx, + .ntoa = ipoib_ntoa, +}; + +/**************************************************************************** + * + * IPoIB network device + * + **************************************************************************** + */ + +/** + * Transmit packet via IPoIB network device + * + * @v netdev Network device + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int ipoib_transmit ( struct net_device *netdev, + struct io_buffer *iobuf ) { + struct ipoib_device *ipoib = netdev->priv; + struct ib_device *ibdev = ipoib->ibdev; + struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data; + + if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) { + DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib ); + return -EINVAL; + } + + iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) ); + return ib_post_send ( ibdev, ipoib->qp, + &hack_ipoib_bcast_av, iobuf ); +} + +/** + * Handle IPoIB send completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +static void ipoib_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->owner_priv; + + netdev_tx_complete_err ( netdev, iobuf, + ( completion->syndrome ? -EIO : 0 ) ); +} + +/** + * Handle IPoIB receive completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +static void ipoib_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->owner_priv; + struct ipoib_device *ipoib = netdev->priv; + struct ib_global_route_header *grh = iobuf->data; + struct ipoib_pseudo_hdr *ipoib_pshdr; + + if ( completion->syndrome ) { + netdev_rx_err ( netdev, iobuf, -EIO ); + } else { + iob_put ( iobuf, completion->len ); + iob_pull ( iobuf, ( sizeof ( *grh ) - + sizeof ( *ipoib_pshdr ) ) ); + /* FIXME: fill in a MAC address for the sake of AoE! */ + netdev_rx ( netdev, iobuf ); + } + + ipoib->rx_fill--; +} + +/** + * Refill IPoIB receive ring + * + * @v ipoib IPoIB device + */ +static void ipoib_refill_recv ( struct ipoib_device *ipoib ) { + struct ib_device *ibdev = ipoib->ibdev; + struct io_buffer *iobuf; + int rc; + + while ( ipoib->rx_fill < IPOIB_NUM_RECV_WQES ) { + iobuf = alloc_iob ( IPOIB_MTU ); + if ( ! iobuf ) + break; + if ( ( rc = ib_post_recv ( ibdev, ipoib->qp, + iobuf ) ) != 0 ) { + free_iob ( iobuf ); + break; + } + ipoib->rx_fill++; + } +} + +/** + * Poll IPoIB network device + * + * @v netdev Network device + */ +static void ipoib_poll ( struct net_device *netdev ) { + struct ipoib_device *ipoib = netdev->priv; + struct ib_device *ibdev = ipoib->ibdev; + + ib_poll_cq ( ibdev, ipoib->cq, ipoib_complete_send, + ipoib_complete_recv ); + ipoib_refill_recv ( ipoib ); +} + +/** + * Enable/disable interrupts on IPoIB network device + * + * @v netdev Network device + * @v enable Interrupts should be enabled + */ +static void ipoib_irq ( struct net_device *netdev __unused, + int enable __unused ) { + /* No implementation */ +} + +/** + * Open IPoIB network device + * + * @v netdev Network device + * @ret rc Return status code + */ +static int ipoib_open ( struct net_device *netdev ) { + struct ipoib_device *ipoib = netdev->priv; + struct ib_device *ibdev = ipoib->ibdev; + int rc; + + /* Attach to broadcast multicast GID */ + if ( ( rc = ib_mcast_attach ( ibdev, ipoib->qp, + &ibdev->broadcast_gid ) ) != 0 ) { + DBG ( "Could not attach to broadcast GID: %s\n", + strerror ( rc ) ); + return rc; + } + + /* Fill receive ring */ + ipoib_refill_recv ( ipoib ); + + return 0; +} + +/** + * Close IPoIB network device + * + * @v netdev Network device + */ +static void ipoib_close ( struct net_device *netdev ) { + struct ipoib_device *ipoib = netdev->priv; + struct ib_device *ibdev = ipoib->ibdev; + + /* Detach from broadcast multicast GID */ + ib_mcast_detach ( ibdev, ipoib->qp, &ipoib_broadcast.gid ); + + /* FIXME: should probably flush the receive ring */ +} + +/** IPoIB network device operations */ +static struct net_device_operations ipoib_operations = { + .open = ipoib_open, + .close = ipoib_close, + .transmit = ipoib_transmit, + .poll = ipoib_poll, + .irq = ipoib_irq, +}; + +/** + * Probe IPoIB device + * + * @v ibdev Infiniband device + * @ret rc Return status code + */ +int ipoib_probe ( struct ib_device *ibdev ) { + struct net_device *netdev; + struct ipoib_device *ipoib; + struct ipoib_mac *mac; + int rc; + + /* Allocate network device */ + netdev = alloc_ipoibdev ( sizeof ( *ipoib ) ); + if ( ! netdev ) + return -ENOMEM; + netdev_init ( netdev, &ipoib_operations ); + ipoib = netdev->priv; + ib_set_ownerdata ( ibdev, netdev ); + netdev->dev = ibdev->dev; + memset ( ipoib, 0, sizeof ( *ipoib ) ); + ipoib->ibdev = ibdev; + + /* Allocate completion queue */ + ipoib->cq = ib_create_cq ( ibdev, IPOIB_NUM_CQES ); + if ( ! ipoib->cq ) { + DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n", + ipoib ); + rc = -ENOMEM; + goto err_create_cq; + } + + /* Allocate queue pair */ + ipoib->qp = ib_create_qp ( ibdev, IPOIB_NUM_SEND_WQES, + ipoib->cq, IPOIB_NUM_RECV_WQES, + ipoib->cq, hack_ipoib_qkey ); + if ( ! ipoib->qp ) { + DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n", + ipoib ); + rc = -ENOMEM; + goto err_create_qp; + } + ipoib->qp->owner_priv = netdev; + + /* Construct MAC address */ + mac = ( ( struct ipoib_mac * ) netdev->ll_addr ); + mac->qpn = htonl ( ipoib->qp->qpn ); + memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) ); + + /* Register network device */ + if ( ( rc = register_netdev ( netdev ) ) != 0 ) + goto err_register_netdev; + + return 0; + + err_register_netdev: + ib_destroy_qp ( ibdev, ipoib->qp ); + err_create_qp: + ib_destroy_cq ( ibdev, ipoib->cq ); + err_create_cq: + netdev_nullify ( netdev ); + netdev_put ( netdev ); + return rc; +} + +/** + * Remove IPoIB device + * + * @v ibdev Infiniband device + */ +void ipoib_remove ( struct ib_device *ibdev ) { + struct net_device *netdev = ib_get_ownerdata ( ibdev ); + + unregister_netdev ( netdev ); + netdev_nullify ( netdev ); + netdev_put ( netdev ); +} diff --git a/src/drivers/net/mlx_ipoib/ib_driver.c b/src/drivers/net/mlx_ipoib/ib_driver.c index 590fb94db..34d4cbaaf 100644 --- a/src/drivers/net/mlx_ipoib/ib_driver.c +++ b/src/drivers/net/mlx_ipoib/ib_driver.c @@ -63,6 +63,7 @@ static int wait_logic_link_up(__u8 port) } unsigned long ipoib_qkey; +unsigned long hack_ipoib_qkey; static int ib_driver_init(struct pci_device *pci, udqp_t * ipoib_qph_p) { @@ -149,7 +150,7 @@ static int ib_driver_init(struct pci_device *pci, udqp_t * ipoib_qph_p) qkey, mlid); } - ipoib_qkey = qkey; + hack_ipoib_qkey = ipoib_qkey = qkey; #if 0 rc = create_ipoib_qp(&ib_data.ipoib_qp, @@ -285,7 +286,7 @@ static int poll_cqe_tout(cq_t cqh, __u16 tout, void **wqe, int *good_p) end = currticks() + tout; do { - rc = ib_poll_cq(cqh, &ib_cqe, &num_cqes); + rc = ib_poll_cqx(cqh, &ib_cqe, &num_cqes); if (rc) return rc; diff --git a/src/drivers/net/mlx_ipoib/ib_driver.h b/src/drivers/net/mlx_ipoib/ib_driver.h index 6dca8d30f..7fc57364d 100644 --- a/src/drivers/net/mlx_ipoib/ib_driver.h +++ b/src/drivers/net/mlx_ipoib/ib_driver.h @@ -153,7 +153,7 @@ static int gw_read_cr(__u32 addr, __u32 * result); static int gw_write_cr(__u32 addr, __u32 data); static ud_av_t alloc_ud_av(void); static void free_ud_av(ud_av_t av); -static int ib_poll_cq(cq_t cq, struct ib_cqe_st *ib_cqe_p, __u8 * num_cqes); +static int ib_poll_cqx(cq_t cq, struct ib_cqe_st *ib_cqe_p, __u8 * num_cqes); static int add_qp_to_mcast_group(union ib_gid_u mcast_gid, __u8 add); static int clear_interrupt(void); static int poll_cqe_tout(cq_t cqh, __u16 tout, void **wqe, int *good_p); diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index ba1108a36..a5d251d46 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -1730,7 +1730,7 @@ static void dev2ib_cqe(struct ib_cqe_st *ib_cqe_p, union cqe_st *cqe_p) byte_cnt); } -static int ib_poll_cq(void *cqh, struct ib_cqe_st *ib_cqe_p, u8 * num_cqes) +static int ib_poll_cqx(void *cqh, struct ib_cqe_st *ib_cqe_p, u8 * num_cqes) { int rc; union cqe_st cqe; diff --git a/src/drivers/net/mlx_ipoib/ipoib.c b/src/drivers/net/mlx_ipoib/ipoib.c index d4124f21b..d8dd6bf64 100644 --- a/src/drivers/net/mlx_ipoib/ipoib.c +++ b/src/drivers/net/mlx_ipoib/ipoib.c @@ -879,7 +879,7 @@ static int ipoib_read_packet(__u16 * prot_p, void *data, unsigned int *size_p, void *buf, *out_buf; __u16 prot_type; - rc = ib_poll_cq(ipoib_data.rcv_cqh, &ib_cqe, &num_cqes); + rc = ib_poll_cqx(ipoib_data.rcv_cqh, &ib_cqe, &num_cqes); if (rc) { return rc; } diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index aed6d208a..6aa4e7fed 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -16,6 +16,7 @@ Skeleton NIC driver for Etherboot #include #include #include +#include /* to get some global routines like printf */ #include "etherboot.h" @@ -29,11 +30,18 @@ Skeleton NIC driver for Etherboot #include "arbel.h" +struct ib_address_vector hack_ipoib_bcast_av; + + + + static const struct ib_gid arbel_no_gid = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } }; +#if 0 + #define MLX_RX_MAX_FILL NUM_IPOIB_RCV_WQES struct mlx_nic { @@ -275,6 +283,7 @@ static void mlx_poll ( struct net_device *netdev ) { &static_ipoib_send_cq, #endif temp_complete_send, temp_complete_recv ); +#if 0 arbel_poll_cq ( &static_ibdev, #if CREATE_OWN mlx->own_recv_cq, @@ -282,6 +291,7 @@ static void mlx_poll ( struct net_device *netdev ) { &static_ipoib_recv_cq, #endif temp_complete_send, temp_complete_recv ); +#endif mlx_refill_rx ( netdev ); } @@ -308,6 +318,8 @@ static struct net_device_operations mlx_operations = { }; +#endif /* 0 */ + /*************************************************************************** @@ -1488,6 +1500,8 @@ static int arbel_get_port_gid ( struct arbel *arbel, struct ib_gid *gid ) { +#if 0 + /** * Probe PCI device * @@ -1576,14 +1590,17 @@ static int arbel_probe ( struct pci_device *pci, DBG ( "Could not create send CQ\n" ); return -EIO; } +#if 0 mlx->own_recv_cq = ib_create_cq ( ibdev, 32 ); if ( ! mlx->own_recv_cq ) { DBG ( "Could not create send CQ\n" ); return -EIO; } +#endif mlx->own_qp = ib_create_qp ( ibdev, NUM_IPOIB_SND_WQES, mlx->own_send_cq, NUM_IPOIB_RCV_WQES, - mlx->own_recv_cq, ipoib_qkey ); + //mlx->own_recv_cq, ipoib_qkey ); + mlx->own_send_cq, ipoib_qkey ); if ( ! mlx->own_qp ) { DBG ( "Could not create QP\n" ); return -EIO; @@ -1621,6 +1638,22 @@ static int arbel_probe ( struct pci_device *pci, } #endif + ibdev->dev = &pci->dev; + + + struct ud_av_st *bcast_av = mlx->bcast_av; + struct arbelprm_ud_address_vector *bav = + ( struct arbelprm_ud_address_vector * ) &bcast_av->av; + struct ib_address_vector *av = &hack_ipoib_bcast_av; + av->dest_qp = bcast_av->dest_qp; + av->qkey = bcast_av->qkey; + av->dlid = MLX_GET ( bav, rlid ); + av->rate = ( MLX_GET ( bav, max_stat_rate ) ? 1 : 4 ); + av->sl = MLX_GET ( bav, sl ); + av->gid_present = 1; + memcpy ( &av->gid, ( ( void * ) bav ) + 16, 16 ); + + /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) goto err_register_netdev; @@ -1650,6 +1683,114 @@ static void arbel_remove ( struct pci_device *pci ) { netdev_put ( netdev ); } +#endif /* 0 */ + + + +/** + * Probe PCI device + * + * @v pci PCI device + * @v id PCI ID + * @ret rc Return status code + */ +static int arbel_probe ( struct pci_device *pci, + const struct pci_device_id *id __unused ) { + struct ib_device *ibdev; + struct arbelprm_query_dev_lim dev_lim; + struct arbel *arbel; + udqp_t qph; + int rc; + + /* Allocate Infiniband device */ + ibdev = alloc_ibdev ( sizeof ( *arbel ) ); + if ( ! ibdev ) + return -ENOMEM; + ibdev->op = &arbel_ib_operations; + pci_set_drvdata ( pci, ibdev ); + ibdev->dev = &pci->dev; + arbel = ibdev->dev_priv; + memset ( arbel, 0, sizeof ( *arbel ) ); + + /* Fix up PCI device */ + adjust_pci_device ( pci ); + + /* Initialise hardware */ + if ( ( rc = ib_driver_init ( pci, &qph ) ) != 0 ) + goto err_ib_driver_init; + + /* Hack up IB structures */ + arbel->config = memfree_pci_dev.cr_space; + arbel->mailbox_in = dev_buffers_p->inprm_buf; + arbel->mailbox_out = dev_buffers_p->outprm_buf; + arbel->uar = memfree_pci_dev.uar; + arbel->db_rec = dev_ib_data.uar_context_base; + arbel->reserved_lkey = dev_ib_data.mkey; + arbel->eqn = dev_ib_data.eq.eqn; + + /* Get device limits */ + if ( ( rc = arbel_cmd_query_dev_lim ( arbel, &dev_lim ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get device limits: %s\n", + arbel, strerror ( rc ) ); + goto err_query_dev_lim; + } + arbel->limits.reserved_uars = MLX_GET ( &dev_lim, num_rsvd_uars ); + arbel->limits.reserved_cqs = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); + arbel->limits.reserved_qps = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); + + /* Get port GID */ + if ( ( rc = arbel_get_port_gid ( arbel, &ibdev->port_gid ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine port GID: %s\n", + arbel, strerror ( rc ) ); + goto err_get_port_gid; + } + + struct ud_av_st *bcast_av = ib_data.bcast_av; + struct arbelprm_ud_address_vector *bav = + ( struct arbelprm_ud_address_vector * ) &bcast_av->av; + struct ib_address_vector *av = &hack_ipoib_bcast_av; + av->dest_qp = bcast_av->dest_qp; + av->qkey = bcast_av->qkey; + av->dlid = MLX_GET ( bav, rlid ); + av->rate = ( MLX_GET ( bav, max_stat_rate ) ? 1 : 4 ); + av->sl = MLX_GET ( bav, sl ); + av->gid_present = 1; + memcpy ( &av->gid, ( ( void * ) bav ) + 16, 16 ); + + memcpy ( &ibdev->broadcast_gid, &ib_data.bcast_gid, 16 ); + + /* Add IPoIB device */ + if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not add IPoIB device: %s\n", + arbel, strerror ( rc ) ); + goto err_ipoib_probe; + } + + return 0; + + err_ipoib_probe: + err_get_port_gid: + err_query_dev_lim: + ib_driver_close ( 0 ); + err_ib_driver_init: + free_ibdev ( ibdev ); + return rc; +} + +/** + * Remove PCI device + * + * @v pci PCI device + */ +static void arbel_remove ( struct pci_device *pci ) { + struct ib_device *ibdev = pci_get_drvdata ( pci ); + + ipoib_remove ( ibdev ); + ib_driver_close ( 0 ); +} + static struct pci_device_id arbel_nics[] = { PCI_ROM ( 0x15b3, 0x6282, "MT25218", "MT25218 HCA driver" ), PCI_ROM ( 0x15b3, 0x6274, "MT25204", "MT25204 HCA driver" ), diff --git a/src/include/gpxe/errfile.h b/src/include/gpxe/errfile.h index 3413f9cf4..325d23872 100644 --- a/src/include/gpxe/errfile.h +++ b/src/include/gpxe/errfile.h @@ -101,6 +101,8 @@ #define ERRFILE_via_rhine ( ERRFILE_DRIVER | 0x00440000 ) #define ERRFILE_via_velocity ( ERRFILE_DRIVER | 0x00450000 ) #define ERRFILE_w89c840 ( ERRFILE_DRIVER | 0x00460000 ) +#define ERRFILE_ipoib ( ERRFILE_DRIVER | 0x00470000 ) +#define ERRFILE_mt25218 ( ERRFILE_DRIVER | 0x00480000 ) #define ERRFILE_scsi ( ERRFILE_DRIVER | 0x00700000 ) diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 3f09808ca..e9e0121dc 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -8,34 +8,11 @@ */ #include -#include +#include -/** An Infiniband Global Identifier */ -struct ib_gid { - uint8_t bytes[16]; -}; -/** An Infiniband Global Route Header */ -struct ib_global_route_header { - /** IP version, traffic class, and flow label - * - * 4 bits : Version of the GRH - * 8 bits : Traffic class - * 20 bits : Flow label - */ - uint32_t ipver_tclass_flowlabel; - /** Payload length */ - uint16_t paylen; - /** Next header */ - uint8_t nxthdr; - /** Hop limit */ - uint8_t hoplmt; - /** Source GID */ - struct ib_gid sgid; - /** Destiniation GID */ - struct ib_gid dgid; -} __attribute__ (( packed )); +#if 0 /** Infiniband MAC address length */ #define IB_ALEN 20 @@ -60,9 +37,41 @@ struct ibhdr { /** Reserved, must be zero */ uint16_t reserved; } __attribute__ (( packed )); +#endif + + + + + +/** An Infiniband Global Identifier */ +struct ib_gid { + uint8_t bytes[16]; +}; + +/** An Infiniband Global Route Header */ +struct ib_global_route_header { + /** IP version, traffic class, and flow label + * + * 4 bits : Version of the GRH + * 8 bits : Traffic class + * 20 bits : Flow label + */ + uint32_t ipver_tclass_flowlabel; + /** Payload length */ + uint16_t paylen; + /** Next header */ + uint8_t nxthdr; + /** Hop limit */ + uint8_t hoplmt; + /** Source GID */ + struct ib_gid sgid; + /** Destiniation GID */ + struct ib_gid dgid; +} __attribute__ (( packed )); + struct ib_device; struct ib_queue_pair; struct ib_completion_queue; @@ -223,8 +232,7 @@ struct ib_device_operations { struct ib_queue_pair *qp, struct ib_address_vector *av, struct io_buffer *iobuf ); - /** - * Post receive work queue entry + /** Post receive work queue entry * * @v ibdev Infiniband device * @v qp Queue pair @@ -252,8 +260,7 @@ struct ib_device_operations { struct ib_completion_queue *cq, ib_completer_t complete_send, ib_completer_t complete_recv ); - /** - * Attach to multicast group + /** Attach to multicast group * * @v ibdev Infiniband device * @v qp Queue pair @@ -263,8 +270,7 @@ struct ib_device_operations { int ( * mcast_attach ) ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_gid *gid ); - /** - * Detach from multicast group + /** Detach from multicast group * * @v ibdev Infiniband device * @v qp Queue pair @@ -276,13 +282,19 @@ struct ib_device_operations { }; /** An Infiniband device */ -struct ib_device { +struct ib_device { /** Port GID */ struct ib_gid port_gid; + /** Broadcast GID */ + struct ib_gid broadcast_gid; + /** Underlying device */ + struct device *dev; /** Infiniband operations */ struct ib_device_operations *op; /** Device private data */ void *dev_priv; + /** Owner private data */ + void *owner_priv; }; extern struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev, @@ -297,6 +309,52 @@ extern void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ); extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, unsigned long qpn, int is_send ); +extern struct ib_device * alloc_ibdev ( size_t priv_size ); +extern void free_ibdev ( struct ib_device *ibdev ); + +/** + * Post send work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v av Address vector + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static inline __attribute__ (( always_inline )) int +ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_address_vector *av, struct io_buffer *iobuf ) { + return ibdev->op->post_send ( ibdev, qp, av, iobuf ); +} + +/** + * Post receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static inline __attribute__ (( always_inline )) int +ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct io_buffer *iobuf ) { + return ibdev->op->post_recv ( ibdev, qp, iobuf ); +} + +/** + * Poll completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @v complete_send Send completion handler + * @v complete_recv Receive completion handler + */ +static inline __attribute__ (( always_inline )) void +ib_poll_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq, + ib_completer_t complete_send, ib_completer_t complete_recv ) { + ibdev->op->poll_cq ( ibdev, cq, complete_send, complete_recv ); +} + /** * Attach to multicast group @@ -325,6 +383,27 @@ ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp, ibdev->op->mcast_detach ( ibdev, qp, gid ); } +/** + * Set Infiniband owner-private data + * + * @v pci Infiniband device + * @v priv Private data + */ +static inline void ib_set_ownerdata ( struct ib_device *ibdev, + void *owner_priv ) { + ibdev->owner_priv = owner_priv; +} + +/** + * Get Infiniband owner-private data + * + * @v pci Infiniband device + * @ret priv Private data + */ +static inline void * ib_get_ownerdata ( struct ib_device *ibdev ) { + return ibdev->owner_priv; +} + /***************************************************************************** * * Management datagrams @@ -435,9 +514,7 @@ union ib_mad { - - - +#if 0 extern struct ll_protocol infiniband_protocol; @@ -459,4 +536,6 @@ static inline struct net_device * alloc_ibdev ( size_t priv_size ) { return netdev; } +#endif + #endif /* _GPXE_INFINIBAND_H */ diff --git a/src/include/gpxe/ipoib.h b/src/include/gpxe/ipoib.h new file mode 100644 index 000000000..0551687dd --- /dev/null +++ b/src/include/gpxe/ipoib.h @@ -0,0 +1,78 @@ +#ifndef _GPXE_IPOIB_H +#define _GPXE_IPOIB_H + +/** @file + * + * IP over Infiniband + */ + +#include + +/** IPoIB MAC address length */ +#define IPOIB_ALEN 20 + +/** An IPoIB MAC address */ +struct ipoib_mac { + /** Queue pair number + * + * MSB must be zero; QPNs are only 24-bit. + */ + uint32_t qpn; + /** Port GID */ + struct ib_gid gid; +} __attribute__ (( packed )); + +/** IPoIB link-layer header length */ +#define IPOIB_HLEN 24 + +/** + * IPoIB link-layer header pseudo portion + * + * This part doesn't actually exist on the wire, but it provides a + * convenient way to fit into the typical network device model. + */ +struct ipoib_pseudo_hdr { + /** Peer address */ + struct ipoib_mac peer; +} __attribute__ (( packed )); + +/** IPoIB link-layer header real portion */ +struct ipoib_real_hdr { + /** Network-layer protocol */ + uint16_t proto; + /** Reserved, must be zero */ + uint16_t reserved; +} __attribute__ (( packed )); + +/** An IPoIB link-layer header */ +struct ipoib_hdr { + /** Pseudo portion */ + struct ipoib_pseudo_hdr pseudo; + /** Real portion */ + struct ipoib_real_hdr real; +} __attribute__ (( packed )); + +extern struct ll_protocol ipoib_protocol; + +extern const char * ipoib_ntoa ( const void *ll_addr ); + +/** + * Allocate IPoIB device + * + * @v priv_size Size of driver private data + * @ret netdev Network device, or NULL + */ +static inline struct net_device * alloc_ipoibdev ( size_t priv_size ) { + struct net_device *netdev; + + netdev = alloc_netdev ( priv_size ); + if ( netdev ) { + netdev->ll_protocol = &ipoib_protocol; + } + return netdev; +} + +extern int ipoib_probe ( struct ib_device *ibdev ); +extern void ipoib_remove ( struct ib_device *ibdev ); + +#endif /* _GPXE_IPOIB_H */ diff --git a/src/net/infiniband.c b/src/net/infiniband.c index a9ca0e31d..7a68b7d41 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -153,8 +153,6 @@ void ib_destroy_qp ( struct ib_device *ibdev, free ( qp ); } - - /** * Find work queue belonging to completion queue * @@ -174,7 +172,35 @@ struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, return NULL; } +/** + * Allocate Infiniband device + * + * @v priv_size Size of private data area + * @ret ibdev Infiniband device, or NULL + */ +struct ib_device * alloc_ibdev ( size_t priv_size ) { + struct ib_device *ibdev; + size_t total_len; + + total_len = ( sizeof ( *ibdev ) + priv_size ); + ibdev = zalloc ( total_len ); + if ( ibdev ) { + ibdev->dev_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) ); + } + return ibdev; +} +/** + * Free Infiniband device + * + * @v ibdev Infiniband device + */ +void free_ibdev ( struct ib_device *ibdev ) { + free ( ibdev ); +} + + +#if 0 /** Infiniband broadcast MAC address */ static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; @@ -259,3 +285,5 @@ struct ll_protocol infiniband_protocol __ll_protocol = { .rx = ib_rx, .ntoa = ib_ntoa, }; + +#endif -- cgit v1.2.3-55-g7522 From 440e7926fbfc419115a85b61d3c740f640b68756 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 05:12:47 +0100 Subject: Dead code removal --- src/drivers/net/mlx_ipoib/mt25218.c | 475 +----------------------------------- src/include/gpxe/infiniband.h | 63 ----- 2 files changed, 5 insertions(+), 533 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 6aa4e7fed..da33e97ba 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -35,290 +35,7 @@ struct ib_address_vector hack_ipoib_bcast_av; -static const struct ib_gid arbel_no_gid = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } -}; - - -#if 0 - -#define MLX_RX_MAX_FILL NUM_IPOIB_RCV_WQES - -struct mlx_nic { -#if ! CREATE_OWN - /** Queue pair handle */ - udqp_t ipoib_qph; - /** Send completion queue */ - cq_t snd_cqh; - /** Receive completion queue */ - cq_t rcv_cqh; -#endif - /** Broadcast Address Vector */ - ud_av_t bcast_av; - - /** RX fill level */ - unsigned int rx_fill; - -#if CREATE_OWN - struct ib_completion_queue *own_send_cq; - struct ib_completion_queue *own_recv_cq; - struct ib_queue_pair *own_qp; -#endif -}; - - -static struct io_buffer *static_ipoib_tx_ring[NUM_IPOIB_SND_WQES]; -static struct io_buffer *static_ipoib_rx_ring[NUM_IPOIB_RCV_WQES]; - -static struct arbel static_arbel; - -#if ! CREATE_OWN - -static struct arbel_completion_queue static_arbel_ipoib_send_cq = { - .ci_doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, -}; -static struct ib_completion_queue static_ipoib_send_cq = { - .cqn = 1234, /* Only used for debug messages */ - .num_cqes = NUM_IPOIB_SND_CQES, - .work_queues = LIST_HEAD_INIT ( static_ipoib_send_cq.work_queues ), - .dev_priv = &static_arbel_ipoib_send_cq, -}; - -static struct arbel_completion_queue static_arbel_ipoib_recv_cq = { - .ci_doorbell_idx = IPOIB_RCV_CQ_CI_DB_IDX, -}; -static struct ib_completion_queue static_ipoib_recv_cq = { - .cqn = 2345, /* Only used for debug messages */ - .num_cqes = NUM_IPOIB_RCV_CQES, - .work_queues = LIST_HEAD_INIT ( static_ipoib_recv_cq.work_queues ), - .dev_priv = &static_arbel_ipoib_recv_cq, -}; - -static struct arbel_queue_pair static_arbel_ipoib_qp = { - .send = { - .doorbell_idx = IPOIB_SND_QP_DB_IDX, - }, - .recv = { - .doorbell_idx = IPOIB_RCV_QP_DB_IDX, - }, -}; -static struct ib_queue_pair static_ipoib_qp = { - .send = { - .qp = &static_ipoib_qp, - .is_send = 1, - .cq = &static_ipoib_send_cq, - .num_wqes = NUM_IPOIB_SND_WQES, - .iobufs = static_ipoib_tx_ring, - .list = LIST_HEAD_INIT (static_ipoib_qp.send.list), - .dev_priv = &static_arbel_ipoib_qp.send, - }, - .recv = { - .qp = &static_ipoib_qp, - .is_send = 0, - .cq = &static_ipoib_recv_cq, - .num_wqes = NUM_IPOIB_RCV_WQES, - .iobufs = static_ipoib_rx_ring, - .list = LIST_HEAD_INIT (static_ipoib_qp.recv.list), - .dev_priv = &static_arbel_ipoib_qp.recv, - }, - .dev_priv = &static_arbel_ipoib_qp, -}; - -#endif - - -static struct ib_device static_ibdev = { - .dev_priv = &static_arbel, -}; - - -/** - * Open network device - * - * @v netdev Network device - * @ret rc Return status code - */ -static int mlx_open ( struct net_device *netdev ) { - - ( void ) netdev; - - return 0; -} - -/** - * Close network device - * - * @v netdev Network device - */ -static void mlx_close ( struct net_device *netdev ) { - - ( void ) netdev; - -} - -static int arbel_post_send ( struct ib_device *ibdev, - struct ib_queue_pair *qp, - struct ib_address_vector *av, - struct io_buffer *iobuf ); - -static int mlx_transmit_direct ( struct net_device *netdev, - struct io_buffer *iobuf ) { - struct mlx_nic *mlx = netdev->priv; - int rc; - - struct ud_av_st *bcast_av = mlx->bcast_av; - struct arbelprm_ud_address_vector *bav = - ( struct arbelprm_ud_address_vector * ) &bcast_av->av; - struct ib_address_vector av = { - .dest_qp = bcast_av->dest_qp, - .qkey = bcast_av->qkey, - .dlid = MLX_GET ( bav, rlid ), - .rate = ( MLX_GET ( bav, max_stat_rate ) ? 1 : 4 ), - .sl = MLX_GET ( bav, sl ), - .gid_present = 1, - }; - memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); - - rc = arbel_post_send ( &static_ibdev, -#if CREATE_OWN - mlx->own_qp, -#else - &static_ipoib_qp, -#endif - &av, iobuf ); - - return rc; -} - -static void arbel_poll_cq ( struct ib_device *ibdev, - struct ib_completion_queue *cq, - ib_completer_t complete_send, - ib_completer_t complete_recv ); - -static void temp_complete_send ( struct ib_device *ibdev __unused, - struct ib_queue_pair *qp, - struct ib_completion *completion, - struct io_buffer *iobuf ) { - struct net_device *netdev = qp->owner_priv; - - DBG ( "Wahey! TX completion\n" ); - netdev_tx_complete_err ( netdev, iobuf, - ( completion->syndrome ? -EIO : 0 ) ); -} - -static void temp_complete_recv ( struct ib_device *ibdev __unused, - struct ib_queue_pair *qp, - struct ib_completion *completion, - struct io_buffer *iobuf ) { - struct net_device *netdev = qp->owner_priv; - struct mlx_nic *mlx = netdev->priv; - - DBG ( "Yay! RX completion on %p len %zx:\n", iobuf, completion->len ); - if ( completion->syndrome ) { - netdev_rx_err ( netdev, iobuf, -EIO ); - } else { - iob_put ( iobuf, completion->len ); - iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); - netdev_rx ( netdev, iobuf ); - } - - mlx->rx_fill--; -} - -static int arbel_post_recv ( struct ib_device *ibdev, - struct ib_queue_pair *qp, - struct io_buffer *iobuf ); - -static void mlx_refill_rx ( struct net_device *netdev ) { - struct mlx_nic *mlx = netdev->priv; - struct io_buffer *iobuf; - int rc; - - while ( mlx->rx_fill < MLX_RX_MAX_FILL ) { - iobuf = alloc_iob ( 2048 ); - if ( ! iobuf ) - break; - DBG ( "Posting RX buffer %p:\n", iobuf ); - if ( ( rc = arbel_post_recv ( &static_ibdev, -#if CREATE_OWN - mlx->own_qp, -#else - &static_ipoib_qp, -#endif - iobuf ) ) != 0 ) { - free_iob ( iobuf ); - break; - } - mlx->rx_fill++; - } -} - -/** - * Poll for completed and received packets - * - * @v netdev Network device - */ -static void mlx_poll ( struct net_device *netdev ) { - struct mlx_nic *mlx = netdev->priv; - int rc; - - if ( ( rc = poll_error_buf() ) != 0 ) { - DBG ( "poll_error_buf() failed: %s\n", strerror ( rc ) ); - return; - } - - /* Drain event queue. We can ignore events, since we're going - * to just poll all completion queues anyway. - */ - if ( ( rc = drain_eq() ) != 0 ) { - DBG ( "drain_eq() failed: %s\n", strerror ( rc ) ); - return; - } - - /* Poll completion queues */ - arbel_poll_cq ( &static_ibdev, -#if CREATE_OWN - mlx->own_send_cq, -#else - &static_ipoib_send_cq, -#endif - temp_complete_send, temp_complete_recv ); -#if 0 - arbel_poll_cq ( &static_ibdev, -#if CREATE_OWN - mlx->own_recv_cq, -#else - &static_ipoib_recv_cq, -#endif - temp_complete_send, temp_complete_recv ); -#endif - - mlx_refill_rx ( netdev ); -} - -/** - * Enable or disable interrupts - * - * @v netdev Network device - * @v enable Interrupts should be enabled - */ -static void mlx_irq ( struct net_device *netdev, int enable ) { - - ( void ) netdev; - ( void ) enable; - -} - -static struct net_device_operations mlx_operations = { - .open = mlx_open, - .close = mlx_close, - .transmit = mlx_transmit_direct, - .poll = mlx_poll, - .irq = mlx_irq, -}; - -#endif /* 0 */ @@ -1030,6 +747,11 @@ static void arbel_ring_doorbell ( struct arbel *arbel, writel ( db_reg->dword[1], ( arbel->uar + offset + 4 ) ); } +/** GID used for GID-less send work queue entries */ +static const struct ib_gid arbel_no_gid = { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } +}; + /** * Post send work queue entry * @@ -1500,193 +1222,6 @@ static int arbel_get_port_gid ( struct arbel *arbel, struct ib_gid *gid ) { -#if 0 - -/** - * Probe PCI device - * - * @v pci PCI device - * @v id PCI ID - * @ret rc Return status code - */ -static int arbel_probe ( struct pci_device *pci, - const struct pci_device_id *id __unused ) { - struct net_device *netdev; - struct arbelprm_query_dev_lim dev_lim; - struct arbel *arbel = &static_arbel; - struct mlx_nic *mlx; - struct ib_mac *mac; - udqp_t qph; - int rc; - - /* Allocate net device */ - netdev = alloc_ibdev ( sizeof ( *mlx ) ); - if ( ! netdev ) - return -ENOMEM; - netdev_init ( netdev, &mlx_operations ); - mlx = netdev->priv; - pci_set_drvdata ( pci, netdev ); - netdev->dev = &pci->dev; - memset ( mlx, 0, sizeof ( *mlx ) ); - - /* Fix up PCI device */ - adjust_pci_device ( pci ); - - /* Initialise hardware */ - if ( ( rc = ib_driver_init ( pci, &qph ) ) != 0 ) - goto err_ipoib_init; - mlx->bcast_av = ib_data.bcast_av; -#if ! CREATE_OWN - mlx->ipoib_qph = qph; - mlx->snd_cqh = ib_data.ipoib_snd_cq; - mlx->rcv_cqh = ib_data.ipoib_rcv_cq; - mac = ( ( struct ib_mac * ) netdev->ll_addr ); - mac->qpn = htonl ( ib_get_qpn ( mlx->ipoib_qph ) ); - memcpy ( &mac->gid, ib_data.port_gid.raw, sizeof ( mac->gid ) ); -#endif - - /* Hack up IB structures */ - arbel->config = memfree_pci_dev.cr_space; - arbel->mailbox_in = dev_buffers_p->inprm_buf; - arbel->mailbox_out = dev_buffers_p->outprm_buf; - arbel->uar = memfree_pci_dev.uar; - arbel->db_rec = dev_ib_data.uar_context_base; - arbel->reserved_lkey = dev_ib_data.mkey; - arbel->eqn = dev_ib_data.eq.eqn; -#if ! CREATE_OWN - static_arbel_ipoib_qp.send.wqe = - ( ( struct udqp_st * ) qph )->snd_wq; - static_arbel_ipoib_qp.recv.wqe = - ( ( struct udqp_st * ) qph )->rcv_wq; - static_arbel_ipoib_send_cq.cqe = - ( ( struct cq_st * ) ib_data.ipoib_snd_cq )->cq_buf; - static_arbel_ipoib_recv_cq.cqe = - ( ( struct cq_st * ) ib_data.ipoib_rcv_cq )->cq_buf; - static_ipoib_qp.qpn = ib_get_qpn ( qph ); - static_ipoib_qp.owner_priv = netdev; - list_add ( &static_ipoib_qp.send.list, - &static_ipoib_send_cq.work_queues ); - list_add ( &static_ipoib_qp.recv.list, - &static_ipoib_recv_cq.work_queues ); -#endif - static_ibdev.op = &arbel_ib_operations; - - /* Get device limits */ - if ( ( rc = arbel_cmd_query_dev_lim ( arbel, &dev_lim ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not get device limits: %s\n", - arbel, strerror ( rc ) ); - goto err_query_dev_lim; - } - arbel->limits.reserved_uars = MLX_GET ( &dev_lim, num_rsvd_uars ); - arbel->limits.reserved_cqs = - ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); - arbel->limits.reserved_qps = - ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); - -#if CREATE_OWN - struct ib_device *ibdev = &static_ibdev; - mlx->own_send_cq = ib_create_cq ( ibdev, 32 ); - if ( ! mlx->own_send_cq ) { - DBG ( "Could not create send CQ\n" ); - return -EIO; - } -#if 0 - mlx->own_recv_cq = ib_create_cq ( ibdev, 32 ); - if ( ! mlx->own_recv_cq ) { - DBG ( "Could not create send CQ\n" ); - return -EIO; - } -#endif - mlx->own_qp = ib_create_qp ( ibdev, NUM_IPOIB_SND_WQES, - mlx->own_send_cq, NUM_IPOIB_RCV_WQES, - //mlx->own_recv_cq, ipoib_qkey ); - mlx->own_send_cq, ipoib_qkey ); - if ( ! mlx->own_qp ) { - DBG ( "Could not create QP\n" ); - return -EIO; - } - mlx->own_qp->owner_priv = netdev; - struct ib_gid *bcast_gid = ( struct ib_gid * ) &ib_data.bcast_gid; - if ( ( rc = ib_mcast_attach ( ibdev, mlx->own_qp, - bcast_gid ) ) != 0 ) { - DBG ( "Could not attach to broadcast GID: %s\n", - strerror ( rc ) ); - return rc; - } - - if ( ( rc = arbel_get_port_gid ( arbel, &ibdev->port_gid ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not determine port GID: %s\n", - arbel, strerror ( rc ) ); - return rc; - } - - DBG ( "Port GID:\n" ); - DBG_HD ( &ibdev->port_gid, sizeof ( ibdev->port_gid ) ); - - - mac = ( ( struct ib_mac * ) netdev->ll_addr ); - mac->qpn = htonl ( mlx->own_qp->qpn ); - memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) ); -#endif - -#if 0 - DBG ( "MADS SND CQN = %#lx\n", dev_ib_data.mads_qp.snd_cq.cqn ); - struct ib_completion_queue *test_cq; - test_cq = ib_create_cq ( &static_ibdev, 32 ); - if ( test_cq ) { - DBG ( "Woot: create_cq() passed!\n" ); - } -#endif - - ibdev->dev = &pci->dev; - - - struct ud_av_st *bcast_av = mlx->bcast_av; - struct arbelprm_ud_address_vector *bav = - ( struct arbelprm_ud_address_vector * ) &bcast_av->av; - struct ib_address_vector *av = &hack_ipoib_bcast_av; - av->dest_qp = bcast_av->dest_qp; - av->qkey = bcast_av->qkey; - av->dlid = MLX_GET ( bav, rlid ); - av->rate = ( MLX_GET ( bav, max_stat_rate ) ? 1 : 4 ); - av->sl = MLX_GET ( bav, sl ); - av->gid_present = 1; - memcpy ( &av->gid, ( ( void * ) bav ) + 16, 16 ); - - - /* Register network device */ - if ( ( rc = register_netdev ( netdev ) ) != 0 ) - goto err_register_netdev; - - return 0; - - err_query_dev_lim: - err_register_netdev: - err_ipoib_init: - ib_driver_close ( 0 ); - netdev_nullify ( netdev ); - netdev_put ( netdev ); - return rc; -} - -/** - * Remove PCI device - * - * @v pci PCI device - */ -static void arbel_remove ( struct pci_device *pci ) { - struct net_device *netdev = pci_get_drvdata ( pci ); - - unregister_netdev ( netdev ); - ib_driver_close ( 0 ); - netdev_nullify ( netdev ); - netdev_put ( netdev ); -} - -#endif /* 0 */ - - - /** * Probe PCI device * diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index e9e0121dc..236b27272 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -10,42 +10,6 @@ #include #include - - -#if 0 -/** Infiniband MAC address length */ -#define IB_ALEN 20 - -/** An Infiniband MAC address */ -struct ib_mac { - /** Queue pair number - * - * MSB must be zero; QPNs are only 24-bit. - */ - uint32_t qpn; - /** Port GID */ - struct ib_gid gid; -} __attribute__ (( packed )); - -/** Infiniband link-layer header length */ -#define IB_HLEN 4 - -/** An Infiniband link-layer header */ -struct ibhdr { - /** Network-layer protocol */ - uint16_t proto; - /** Reserved, must be zero */ - uint16_t reserved; -} __attribute__ (( packed )); -#endif - - - - - - - - /** An Infiniband Global Identifier */ struct ib_gid { uint8_t bytes[16]; @@ -511,31 +475,4 @@ union ib_mad { struct ib_mad_port_info port_info; } __attribute__ (( packed )); - - - -#if 0 - -extern struct ll_protocol infiniband_protocol; - -extern const char * ib_ntoa ( const void *ll_addr ); - -/** - * Allocate Infiniband device - * - * @v priv_size Size of driver private data - * @ret netdev Network device, or NULL - */ -static inline struct net_device * alloc_ibdev ( size_t priv_size ) { - struct net_device *netdev; - - netdev = alloc_netdev ( priv_size ); - if ( netdev ) { - netdev->ll_protocol = &infiniband_protocol; - } - return netdev; -} - -#endif - #endif /* _GPXE_INFINIBAND_H */ -- cgit v1.2.3-55-g7522 From e05a8cd4deb75ae145160cddf146daba0a17ced4 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 06:35:21 +0100 Subject: Use pkey table access to determine broadcast GID directly. --- src/drivers/net/mlx_ipoib/mt25218.c | 73 ++++++++++++++++++++++++++++++++----- src/include/gpxe/infiniband.h | 8 ++++ 2 files changed, 72 insertions(+), 9 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index da33e97ba..082078196 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -1206,21 +1206,69 @@ static int arbel_get_guid_info ( struct arbel *arbel, return 0; } -static int arbel_get_port_gid ( struct arbel *arbel, struct ib_gid *gid ) { - struct ib_mad_port_info port_info; - struct ib_mad_guid_info guid_info; +static int arbel_get_pkey_table ( struct arbel *arbel, + struct ib_mad_pkey_table *pkey_table ) { + union arbelprm_mad mad; + struct ib_mad_hdr *hdr = &mad.mad.mad_hdr; + int rc; + + memset ( &mad, 0, sizeof ( mad ) ); + hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + hdr->class_version = 1; + hdr->method = IB_MGMT_METHOD_GET; + hdr->attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE ); + if ( ( rc = arbel_mad_ifc ( arbel, &mad ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get pkey table: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + memcpy ( pkey_table, &mad.mad.pkey_table, sizeof ( *pkey_table ) ); + return 0; +} + +static int arbel_get_port_gid ( struct arbel *arbel, + struct ib_gid *port_gid ) { + union { + /* This union exists just to save stack space */ + struct ib_mad_port_info port_info; + struct ib_mad_guid_info guid_info; + } u; int rc; - if ( ( rc = arbel_get_port_info ( arbel, &port_info ) ) != 0 ) + /* Port info gives us the first half of the port GID */ + if ( ( rc = arbel_get_port_info ( arbel, &u.port_info ) ) != 0 ) return rc; - if ( ( rc = arbel_get_guid_info ( arbel, &guid_info ) ) != 0 ) + memcpy ( &port_gid->bytes[0], u.port_info.gid_prefix, 8 ); + + /* GUID info gives us the second half of the port GID */ + if ( ( rc = arbel_get_guid_info ( arbel, &u.guid_info ) ) != 0 ) return rc; - memcpy ( &gid->bytes[0], port_info.gid_prefix, 8 ); - memcpy ( &gid->bytes[8], guid_info.gid_local, 8 ); + memcpy ( &port_gid->bytes[8], u.guid_info.gid_local, 8 ); + return 0; } +static int arbel_get_broadcast_gid ( struct arbel *arbel, + struct ib_gid *broadcast_gid ) { + static const struct ib_gid ipv4_broadcast_gid = { + { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } + }; + struct ib_mad_pkey_table pkey_table; + int rc; + + /* Start with the IPv4 broadcast GID */ + memcpy ( broadcast_gid, &ipv4_broadcast_gid, + sizeof ( *broadcast_gid ) ); + /* Add partition key */ + if ( ( rc = arbel_get_pkey_table ( arbel, &pkey_table ) ) != 0 ) + return rc; + memcpy ( &broadcast_gid->bytes[4], &pkey_table.pkey[0][0], + sizeof ( pkey_table.pkey[0][0] ) ); + + return 0; +} /** * Probe PCI device @@ -1282,6 +1330,14 @@ static int arbel_probe ( struct pci_device *pci, goto err_get_port_gid; } + /* Get broadcast GID */ + if ( ( rc = arbel_get_broadcast_gid ( arbel, + &ibdev->broadcast_gid ) ) != 0 ){ + DBGC ( arbel, "Arbel %p could not determine broadcast GID: " + "%s\n", arbel, strerror ( rc ) ); + goto err_get_broadcast_gid; + } + struct ud_av_st *bcast_av = ib_data.bcast_av; struct arbelprm_ud_address_vector *bav = ( struct arbelprm_ud_address_vector * ) &bcast_av->av; @@ -1294,8 +1350,6 @@ static int arbel_probe ( struct pci_device *pci, av->gid_present = 1; memcpy ( &av->gid, ( ( void * ) bav ) + 16, 16 ); - memcpy ( &ibdev->broadcast_gid, &ib_data.bcast_gid, 16 ); - /* Add IPoIB device */ if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) { DBGC ( arbel, "Arbel %p could not add IPoIB device: %s\n", @@ -1306,6 +1360,7 @@ static int arbel_probe ( struct pci_device *pci, return 0; err_ipoib_probe: + err_get_broadcast_gid: err_get_port_gid: err_query_dev_lim: ib_driver_close ( 0 ); diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 236b27272..32f9d6752 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -468,11 +468,19 @@ struct ib_mad_port_info { uint8_t link_speed_enabled__link_speed_active; } __attribute__ (( packed )); +struct ib_mad_pkey_table { + struct ib_mad_hdr mad_hdr; + uint32_t mkey[2]; + uint32_t reserved[8]; + uint16_t pkey[16][2]; +} __attribute__ (( packed )); + union ib_mad { struct ib_mad_hdr mad_hdr; struct ib_mad_data data; struct ib_mad_guid_info guid_info; struct ib_mad_port_info port_info; + struct ib_mad_pkey_table pkey_table; } __attribute__ (( packed )); #endif /* _GPXE_INFINIBAND_H */ -- cgit v1.2.3-55-g7522 From b3d3814c176a068980f8c178e74d36e524fa7572 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 08:36:56 +0100 Subject: Obtains a response to the get path record! --- src/drivers/net/ipoib.c | 181 ++++++++++++++++++++++++++++++++++-- src/drivers/net/mlx_ipoib/ib_mad.c | 5 +- src/drivers/net/mlx_ipoib/ib_mad.h | 2 +- src/drivers/net/mlx_ipoib/mt25218.c | 42 ++++++++- src/include/gpxe/infiniband.h | 63 ++++++++++++- 5 files changed, 276 insertions(+), 17 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index 2d351e4bc..a848b711f 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -44,15 +44,24 @@ extern struct ib_address_vector hack_ipoib_bcast_av; /** IPoIB MTU */ #define IPOIB_MTU 2048 -/** Number of IPoIB send work queue entries */ +/** Number of IPoIB data send work queue entries */ #define IPOIB_DATA_NUM_SEND_WQES 4 -/** Number of IPoIB receive work queue entries */ -#define IPOIB_DATA_NUM_RECV_WQES 8 +/** Number of IPoIB data receive work queue entries */ +#define IPOIB_DATA_NUM_RECV_WQES 4 -/** Number of IPoIB completion entries */ +/** Number of IPoIB data completion entries */ #define IPOIB_DATA_NUM_CQES 8 +/** Number of IPoIB metadata send work queue entries */ +#define IPOIB_META_NUM_SEND_WQES 4 + +/** Number of IPoIB metadata receive work queue entries */ +#define IPOIB_META_NUM_RECV_WQES 4 + +/** Number of IPoIB metadata completion entries */ +#define IPOIB_META_NUM_CQES 8 + /** An IPoIB queue set */ struct ipoib_queue_set { /** Completion queue */ @@ -84,10 +93,15 @@ struct ipoib_device { **************************************************************************** */ +/** Broadcast QPN used in IPoIB MAC addresses + * + * This is a guaranteed invalid real QPN + */ +#define IPOIB_BROADCAST_QPN 0xffffffffUL + /** Broadcast IPoIB address */ static struct ipoib_mac ipoib_broadcast = { - .gid = { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } }, + .qpn = ntohl ( IPOIB_BROADCAST_QPN ), }; /** @@ -244,6 +258,73 @@ static int ipoib_create_qset ( struct ipoib_device *ipoib, return rc; } +/** + * Transmit path record request + * + * @v ipoib IPoIB device + * @v gid Destination GID + * @ret rc Return status code + */ +static int ipoib_get_path_record ( struct ipoib_device *ipoib, + struct ib_gid *gid ) { + struct ib_device *ibdev = ipoib->ibdev; + struct io_buffer *iobuf; + struct ib_mad_path_record *path_record; + struct ib_address_vector av; + static uint32_t tid = 0; + int rc; + + DBG ( "get_path_record():\n" ); + int get_path_record(struct ib_gid *dgid, uint16_t *dlid_p, + uint8_t *sl_p, uint8_t *rate_p); + uint16_t tmp_dlid; + uint8_t tmp_sl; + uint8_t tmp_rate; + get_path_record ( gid, &tmp_dlid, &tmp_sl, &tmp_rate ); + + DBG ( "ipoib_get_path_record():\n" ); + + /* Allocate I/O buffer */ + iobuf = alloc_iob ( sizeof ( *path_record ) ); + if ( ! iobuf ) + return -ENOMEM; + iob_put ( iobuf, sizeof ( *path_record ) ); + path_record = iobuf->data; + memset ( path_record, 0, sizeof ( *path_record ) ); + + /* Construct path record request */ + path_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION; + path_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + path_record->mad_hdr.class_version = 2; + path_record->mad_hdr.method = IB_MGMT_METHOD_GET; + path_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC ); + path_record->mad_hdr.tid = tid++; + path_record->sa_hdr.comp_mask[1] = + htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID ); + memcpy ( &path_record->dgid, gid, sizeof ( path_record->dgid ) ); + memcpy ( &path_record->sgid, &ibdev->port_gid, + sizeof ( path_record->sgid ) ); + + DBG_HD ( path_record, sizeof ( *path_record ) ); + + /* Construct address vector */ + memset ( &av, 0, sizeof ( av ) ); + av.dlid = ibdev->sm_lid; + av.dest_qp = IB_SA_QPN; + av.qkey = IB_SA_QKEY; + + /* Post send request */ + if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av, + iobuf ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n", + ipoib, strerror ( rc ) ); + free_iob ( iobuf ); + return rc; + } + + return 0; +} + /** * Transmit packet via IPoIB network device * @@ -256,19 +337,29 @@ static int ipoib_transmit ( struct net_device *netdev, struct ipoib_device *ipoib = netdev->priv; struct ib_device *ibdev = ipoib->ibdev; struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data; + int rc; if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) { DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib ); return -EINVAL; } + DBG ( "TX pseudo-header:\n" ); + DBG_HD ( ipoib_pshdr, sizeof ( *ipoib_pshdr ) ); + if ( ipoib_pshdr->peer.qpn != htonl ( IPOIB_BROADCAST_QPN ) ) { + DBG ( "Get path record\n" ); + rc = ipoib_get_path_record ( ipoib, &ipoib_pshdr->peer.gid ); + free_iob ( iobuf ); + return 0; + } + iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) ); return ib_post_send ( ibdev, ipoib->data.qp, &hack_ipoib_bcast_av, iobuf ); } /** - * Handle IPoIB send completion + * Handle IPoIB data send completion * * @v ibdev Infiniband device * @v qp Queue pair @@ -286,7 +377,7 @@ static void ipoib_data_complete_send ( struct ib_device *ibdev __unused, } /** - * Handle IPoIB receive completion + * Handle IPoIB data receive completion * * @v ibdev Infiniband device * @v qp Queue pair @@ -315,6 +406,61 @@ static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused, ipoib->data.recv_fill--; } +/** + * Handle IPoIB metadata send completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->owner_priv; + struct ipoib_device *ipoib = netdev->priv; + + DBG ( "Woohoo! METADATA TX completion\n" ); + + + if ( completion->syndrome ) { + DBGC ( ipoib, "IPoIB %p metadata TX completion error %x\n", + ipoib, completion->syndrome ); + } + free_iob ( iobuf ); +} + +/** + * Handle IPoIB metadata receive completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +static void ipoib_meta_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->owner_priv; + struct ipoib_device *ipoib = netdev->priv; + + DBG ( "***************** META TX!!!!!! ********\n" ); + + if ( completion->syndrome ) { + DBGC ( ipoib, "IPoIB %p metadata RX completion error %x\n", + ipoib, completion->syndrome ); + } else { + iob_put ( iobuf, completion->len ); + DBG ( "Metadata RX:\n" ); + DBG_HD ( iobuf->data, iob_len ( iobuf ) ); + } + + ipoib->meta.recv_fill--; + free_iob ( iobuf ); +} + /** * Refill IPoIB receive ring * @@ -349,6 +495,9 @@ static void ipoib_poll ( struct net_device *netdev ) { ib_poll_cq ( ibdev, ipoib->data.cq, ipoib_data_complete_send, ipoib_data_complete_recv ); + ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send, + ipoib_meta_complete_recv ); + ipoib_refill_recv ( ipoib, &ipoib->meta ); ipoib_refill_recv ( ipoib, &ipoib->data ); } @@ -382,7 +531,8 @@ static int ipoib_open ( struct net_device *netdev ) { return rc; } - /* Fill receive ring */ + /* Fill receive rings */ + ipoib_refill_recv ( ipoib, &ipoib->meta ); ipoib_refill_recv ( ipoib, &ipoib->data ); return 0; @@ -436,6 +586,17 @@ int ipoib_probe ( struct ib_device *ibdev ) { ipoib->netdev = netdev; ipoib->ibdev = ibdev; + /* Allocate metadata queue set */ + if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta, + IPOIB_META_NUM_CQES, + IPOIB_META_NUM_SEND_WQES, + IPOIB_META_NUM_RECV_WQES, + IB_SA_QKEY ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n", + ipoib, strerror ( rc ) ); + goto err_create_meta_qset; + } + /* Allocate data queue set */ if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data, IPOIB_DATA_NUM_CQES, @@ -461,6 +622,8 @@ int ipoib_probe ( struct ib_device *ibdev ) { err_register_netdev: ipoib_destroy_qset ( ipoib, &ipoib->data ); err_create_data_qset: + ipoib_destroy_qset ( ipoib, &ipoib->meta ); + err_create_meta_qset: netdev_nullify ( netdev ); netdev_put ( netdev ); return rc; diff --git a/src/drivers/net/mlx_ipoib/ib_mad.c b/src/drivers/net/mlx_ipoib/ib_mad.c index 73b49f205..4da4677bc 100644 --- a/src/drivers/net/mlx_ipoib/ib_mad.c +++ b/src/drivers/net/mlx_ipoib/ib_mad.c @@ -264,7 +264,7 @@ static int join_mc_group(__u32 * qkey_p, __u16 * mlid_p, __u8 join) return is_good ? 0 : -1; } -static int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, u8 * sl_p, +int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, u8 * sl_p, u8 * rate_p) { struct path_record_mad_st *mad, *rcv_mad; @@ -321,6 +321,9 @@ static int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, u8 * sl_p, cpu_to_be_buf(mad, sizeof *mad); memcpy(mad->path_record.sgid.raw, ib_data.port_gid.raw, 16); + DBG ( "data:\n" ); + DBG_HD ( mad, sizeof ( *mad ) ); + rc = post_send_req(qp, snd_wqe, 1); if (rc) { eprintf(""); diff --git a/src/drivers/net/mlx_ipoib/ib_mad.h b/src/drivers/net/mlx_ipoib/ib_mad.h index 5ffb54045..51b90d215 100644 --- a/src/drivers/net/mlx_ipoib/ib_mad.h +++ b/src/drivers/net/mlx_ipoib/ib_mad.h @@ -104,7 +104,7 @@ union mad_u { struct ib_mad_st mad; } __attribute__ ((packed)); -static int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, __u8 * sl_p, +int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, __u8 * sl_p, __u8 * rate_p); #endif /* __ib_mad_h__ */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 082078196..fb98d543e 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -174,8 +174,8 @@ static int arbel_cmd ( struct arbel *arbel, unsigned long command, size_t dump_len = in_len; if ( dump_len > 256 ) dump_len = 256; - DBG ( "Input:\n" ); - DBG_HD ( in, dump_len ); + // DBG ( "Input:\n" ); + // DBG_HD ( in, dump_len ); } /* Issue command */ @@ -212,8 +212,8 @@ static int arbel_cmd ( struct arbel *arbel, unsigned long command, size_t dump_len = out_len; if ( dump_len > 256 ) dump_len = 256; - DBG ( "Output:\n" ); - DBG_HD ( out, dump_len ); + // DBG ( "Output:\n" ); + // DBG_HD ( out, dump_len ); } return 0; @@ -749,7 +749,7 @@ static void arbel_ring_doorbell ( struct arbel *arbel, /** GID used for GID-less send work queue entries */ static const struct ib_gid arbel_no_gid = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0 } }; /** @@ -805,6 +805,14 @@ static int arbel_post_send ( struct ib_device *ibdev, MLX_FILL_1 ( &wqe->ud, 3, ud_address_vector.sl, av->sl ); gid = ( av->gid_present ? &av->gid : &arbel_no_gid ); memcpy ( &wqe->ud.u.dwords[4], gid, sizeof ( *gid ) ); + + if ( ! av->gid_present ) { + DBG ( "no_gid:\n" ); + DBG_HD ( &arbel_no_gid, sizeof ( arbel_no_gid ) ); + DBG ( "gid:\n" ); + DBG_HD ( &wqe->ud.u.dwords[4], 16 ); + } + MLX_FILL_1 ( &wqe->ud, 8, destination_qp, av->dest_qp ); MLX_FILL_1 ( &wqe->ud, 9, q_key, av->qkey ); MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_len ( iobuf ) ); @@ -821,6 +829,11 @@ static int arbel_post_send ( struct ib_device *ibdev, f, 1, always1, 1 ); + + DBG ( "arbel_post_send()\n" ); + DBG_HD ( wqe, sizeof ( *wqe ) ); + + /* Update doorbell record */ barrier(); qp_db_rec = &arbel->db_rec[arbel_send_wq->doorbell_idx].qp; @@ -1248,6 +1261,17 @@ static int arbel_get_port_gid ( struct arbel *arbel, return 0; } +static int arbel_get_sm_lid ( struct arbel *arbel, + unsigned long *sm_lid ) { + struct ib_mad_port_info port_info; + int rc; + + if ( ( rc = arbel_get_port_info ( arbel, &port_info ) ) != 0 ) + return rc; + *sm_lid = ntohs ( port_info.mastersm_lid ); + return 0; +} + static int arbel_get_broadcast_gid ( struct arbel *arbel, struct ib_gid *broadcast_gid ) { static const struct ib_gid ipv4_broadcast_gid = { @@ -1323,6 +1347,13 @@ static int arbel_probe ( struct pci_device *pci, arbel->limits.reserved_qps = ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); + /* Get subnet manager LID */ + if ( ( rc = arbel_get_sm_lid ( arbel, &ibdev->sm_lid ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine subnet manager " + "LID: %s\n", arbel, strerror ( rc ) ); + goto err_get_sm_lid; + } + /* Get port GID */ if ( ( rc = arbel_get_port_gid ( arbel, &ibdev->port_gid ) ) != 0 ) { DBGC ( arbel, "Arbel %p could not determine port GID: %s\n", @@ -1362,6 +1393,7 @@ static int arbel_probe ( struct pci_device *pci, err_ipoib_probe: err_get_broadcast_gid: err_get_port_gid: + err_get_sm_lid: err_query_dev_lim: ib_driver_close ( 0 ); err_ib_driver_init: diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 32f9d6752..2d1d94338 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -10,6 +10,12 @@ #include #include +/** Subnet administrator QPN */ +#define IB_SA_QPN 1 + +/** Subnet administrator queue key */ +#define IB_SA_QKEY 0x80010000UL + /** An Infiniband Global Identifier */ struct ib_gid { uint8_t bytes[16]; @@ -250,7 +256,9 @@ struct ib_device { /** Port GID */ struct ib_gid port_gid; /** Broadcast GID */ - struct ib_gid broadcast_gid; + struct ib_gid broadcast_gid; + /** Subnet manager LID */ + unsigned long sm_lid; /** Underlying device */ struct device *dev; /** Infiniband operations */ @@ -422,6 +430,31 @@ static inline void * ib_get_ownerdata ( struct ib_device *ibdev ) { #define IB_SMP_ATTR_LED_INFO 0x0031 #define IB_SMP_ATTR_VENDOR_MASK 0xFF00 +#define IB_SA_ATTR_MC_MEMBER_REC 0x38 +#define IB_SA_ATTR_PATH_REC 0x35 + +#define IB_SA_MCMEMBER_REC_MGID (1<<0) +#define IB_SA_MCMEMBER_REC_PORT_GID (1<<1) +#define IB_SA_MCMEMBER_REC_QKEY (1<<2) +#define IB_SA_MCMEMBER_REC_MLID (1<<3) +#define IB_SA_MCMEMBER_REC_MTU_SELECTOR (1<<4) +#define IB_SA_MCMEMBER_REC_MTU (1<<5) +#define IB_SA_MCMEMBER_REC_TRAFFIC_CLASS (1<<6) +#define IB_SA_MCMEMBER_REC_PKEY (1<<7) +#define IB_SA_MCMEMBER_REC_RATE_SELECTOR (1<<8) +#define IB_SA_MCMEMBER_REC_RATE (1<<9) +#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR (1<<10) +#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME (1<<11) +#define IB_SA_MCMEMBER_REC_SL (1<<12) +#define IB_SA_MCMEMBER_REC_FLOW_LABEL (1<<13) +#define IB_SA_MCMEMBER_REC_HOP_LIMIT (1<<14) +#define IB_SA_MCMEMBER_REC_SCOPE (1<<15) +#define IB_SA_MCMEMBER_REC_JOIN_STATE (1<<16) +#define IB_SA_MCMEMBER_REC_PROXY_JOIN (1<<17) + +#define IB_SA_PATH_REC_DGID (1<<2) +#define IB_SA_PATH_REC_SGID (1<<3) + struct ib_mad_hdr { uint8_t base_version; uint8_t mgmt_class; @@ -435,6 +468,17 @@ struct ib_mad_hdr { uint32_t attr_mod; } __attribute__ (( packed )); +struct ib_sa_hdr { + uint32_t sm_key[2]; + uint16_t reserved; + uint16_t attrib_offset; + uint32_t comp_mask[2]; +} __attribute__ (( packed )); + +struct ib_rmpp_hdr { + uint32_t raw[3]; +} __attribute__ (( packed )); + struct ib_mad_data { struct ib_mad_hdr mad_hdr; uint8_t data[232]; @@ -475,12 +519,29 @@ struct ib_mad_pkey_table { uint16_t pkey[16][2]; } __attribute__ (( packed )); +struct ib_mad_path_record { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + struct ib_sa_hdr sa_hdr; + uint32_t reserved0[2]; + struct ib_gid dgid; + struct ib_gid sgid; + uint16_t dlid; + uint16_t slid; + uint32_t hop_limit__flow_label__raw_traffic; + uint32_t pkey__numb_path__reversible__tclass; + uint32_t rate__rate_selector__mtu__mtu_selector__sl__reserved; + uint32_t preference__packet_lifetime__packet_lifetime_selector; + uint32_t reserved1[35]; +} __attribute__ (( packed )); + union ib_mad { struct ib_mad_hdr mad_hdr; struct ib_mad_data data; struct ib_mad_guid_info guid_info; struct ib_mad_port_info port_info; struct ib_mad_pkey_table pkey_table; + struct ib_mad_path_record path_record; } __attribute__ (( packed )); #endif /* _GPXE_INFINIBAND_H */ -- cgit v1.2.3-55-g7522 From ab191e45dbfe13c5bd2c7f6415c520e8d60ad850 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 10:39:30 +0100 Subject: Unicasts seem to be working. :) --- src/drivers/net/ipoib.c | 141 ++++++++++++++++++++++++++++-------- src/drivers/net/mlx_ipoib/mt25218.c | 14 ++-- src/include/gpxe/infiniband.h | 16 ++-- 3 files changed, 129 insertions(+), 42 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index 8432b2a65..f3b821c47 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -86,6 +86,32 @@ struct ipoib_device { struct ipoib_queue_set meta; }; +/** + * IPoIB path cache entry + * + * This serves a similar role to the ARP cache for Ethernet. (ARP + * *is* used on IPoIB; we have two caches to maintain.) + */ +struct ipoib_cached_path { + /** Destination GID */ + struct ib_gid gid; + /** Destination LID */ + unsigned int dlid; + /** Service level */ + unsigned int sl; + /** Rate */ + unsigned int rate; +}; + +/** Number of IPoIB path cache entries */ +#define IPOIB_NUM_CACHED_PATHS 2 + +/** IPoIB path cache */ +static struct ipoib_cached_path ipoib_path_cache[IPOIB_NUM_CACHED_PATHS]; + +/** Oldest IPoIB path cache entry index */ +static unsigned int ipoib_path_cache_idx = 0; + /**************************************************************************** * * IPoIB link layer @@ -165,15 +191,15 @@ static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { * @ret string Link-layer address in human-readable format */ const char * ipoib_ntoa ( const void *ll_addr ) { - static char buf[61]; - const uint8_t *ipoib_addr = ll_addr; - unsigned int i; - char *p = buf; - - for ( i = 0 ; i < IPOIB_ALEN ; i++ ) { - p += sprintf ( p, ":%02x", ipoib_addr[i] ); - } - return ( buf + 1 ); + static char buf[45]; + const struct ipoib_mac *mac = ll_addr; + + snprintf ( buf, sizeof ( buf ), "%08lx:%08lx:%08lx:%08lx:%08lx", + htonl ( mac->qpn ), htonl ( mac->gid.u.dwords[0] ), + htonl ( mac->gid.u.dwords[1] ), + htonl ( mac->gid.u.dwords[2] ), + htonl ( mac->gid.u.dwords[3] ) ); + return buf; } /** IPoIB protocol */ @@ -258,6 +284,28 @@ static int ipoib_create_qset ( struct ipoib_device *ipoib, return rc; } +/** + * Find path cache entry by GID + * + * @v gid GID + * @ret entry Path cache entry, or NULL + */ +static struct ipoib_cached_path * +ipoib_find_cached_path ( struct ib_gid *gid ) { + struct ipoib_cached_path *path; + unsigned int i; + + for ( i = 0 ; i < IPOIB_NUM_CACHED_PATHS ; i++ ) { + path = &ipoib_path_cache[i]; + if ( memcmp ( &path->gid, gid, sizeof ( *gid ) ) == 0 ) + return path; + } + DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx cache miss\n", + htonl ( gid->u.dwords[0] ), htonl ( gid->u.dwords[1] ), + htonl ( gid->u.dwords[2] ), htonl ( gid->u.dwords[3] ) ); + return NULL; +} + /** * Transmit path record request * @@ -274,17 +322,15 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, static uint32_t tid = 0; int rc; -#if 0 - DBG ( "get_path_record():\n" ); int get_path_record(struct ib_gid *dgid, uint16_t *dlid_p, uint8_t *sl_p, uint8_t *rate_p); uint16_t tmp_dlid; uint8_t tmp_sl; uint8_t tmp_rate; get_path_record ( gid, &tmp_dlid, &tmp_sl, &tmp_rate ); + DBG ( "get_path_record() gives dlid = %04x, sl = %02x, rate = %02x\n", + tmp_dlid, tmp_sl, tmp_rate ); - DBG ( "ipoib_get_path_record():\n" ); -#endif /* Allocate I/O buffer */ iobuf = alloc_iob ( sizeof ( *path_record ) ); @@ -307,13 +353,11 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, memcpy ( &path_record->sgid, &ibdev->port_gid, sizeof ( path_record->sgid ) ); - // DBG_HD ( path_record, sizeof ( *path_record ) ); - /* Construct address vector */ memset ( &av, 0, sizeof ( av ) ); av.dlid = ibdev->sm_lid; av.dest_qp = IB_SA_QPN; - av.qkey = IB_SA_QKEY; + av.qkey = IB_GLOBAL_QKEY; /* Post send request */ if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av, @@ -339,6 +383,8 @@ static int ipoib_transmit ( struct net_device *netdev, struct ipoib_device *ipoib = netdev->priv; struct ib_device *ibdev = ipoib->ibdev; struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data; + struct ib_address_vector av; + struct ipoib_cached_path *path; int rc; if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) { @@ -346,18 +392,32 @@ static int ipoib_transmit ( struct net_device *netdev, return -EINVAL; } - DBG ( "TX pseudo-header:\n" ); - DBG_HD ( ipoib_pshdr, sizeof ( *ipoib_pshdr ) ); - if ( ipoib_pshdr->peer.qpn != htonl ( IPOIB_BROADCAST_QPN ) ) { - DBG ( "Get path record\n" ); - rc = ipoib_get_path_record ( ipoib, &ipoib_pshdr->peer.gid ); - free_iob ( iobuf ); - return 0; + /* Construct address vector */ + memset ( &av, 0, sizeof ( av ) ); + if ( ipoib_pshdr->peer.qpn == htonl ( IPOIB_BROADCAST_QPN ) ) { + /* Broadcast address */ + memcpy ( &av, &hack_ipoib_bcast_av, sizeof ( av ) ); + } else { + /* Unicast - look in path cache */ + path = ipoib_find_cached_path ( &ipoib_pshdr->peer.gid ); + if ( ! path ) { + /* No path entry - get path record */ + rc = ipoib_get_path_record ( ipoib, + &ipoib_pshdr->peer.gid ); + free_iob ( iobuf ); + return rc; + } + av.dest_qp = ntohl ( ipoib_pshdr->peer.qpn ); + av.qkey = IB_GLOBAL_QKEY; + av.dlid = path->dlid; + av.rate = path->rate; + av.sl = path->sl; + av.gid_present = 1; + memcpy ( &av.gid, &ipoib_pshdr->peer.gid, sizeof ( av.gid ) ); } iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) ); - return ib_post_send ( ibdev, ipoib->data.qp, - &hack_ipoib_bcast_av, iobuf ); + return ib_post_send ( ibdev, ipoib->data.qp, &av, iobuf ); } /** @@ -392,14 +452,13 @@ static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused, struct io_buffer *iobuf ) { struct net_device *netdev = qp->owner_priv; struct ipoib_device *ipoib = netdev->priv; - struct ib_global_route_header *grh = iobuf->data; struct ipoib_pseudo_hdr *ipoib_pshdr; if ( completion->syndrome ) { netdev_rx_err ( netdev, iobuf, -EIO ); } else { iob_put ( iobuf, completion->len ); - iob_pull ( iobuf, ( sizeof ( *grh ) - + iob_pull ( iobuf, ( sizeof ( struct ib_global_route_header ) - sizeof ( *ipoib_pshdr ) ) ); /* FIXME: fill in a MAC address for the sake of AoE! */ netdev_rx ( netdev, iobuf ); @@ -444,16 +503,38 @@ static void ipoib_meta_complete_recv ( struct ib_device *ibdev __unused, struct io_buffer *iobuf ) { struct net_device *netdev = qp->owner_priv; struct ipoib_device *ipoib = netdev->priv; - - DBG ( "***************** META RX!!!!!! ********\n" ); + struct ib_mad_path_record *path_record; + struct ipoib_cached_path *path; if ( completion->syndrome ) { DBGC ( ipoib, "IPoIB %p metadata RX completion error %x\n", ipoib, completion->syndrome ); } else { + /* Update path cache */ iob_put ( iobuf, completion->len ); + iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); + DBG ( "Metadata RX:\n" ); DBG_HD ( iobuf->data, iob_len ( iobuf ) ); + + path_record = iobuf->data; + path = &ipoib_path_cache[ipoib_path_cache_idx]; + memcpy ( &path->gid, &path_record->dgid, + sizeof ( path->gid ) ); + path->dlid = ntohs ( path_record->dlid ); + path->sl = ( path_record->reserved__sl & 0x0f ); + path->rate = ( path_record->rate_selector__rate & 0x3f ); + DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx dlid %x sl %x rate %x\n", + htonl ( path->gid.u.dwords[0] ), + htonl ( path->gid.u.dwords[1] ), + htonl ( path->gid.u.dwords[2] ), + htonl ( path->gid.u.dwords[3] ), + path->dlid, path->sl, path->rate ); + + /* Update path cache index */ + ipoib_path_cache_idx++; + if ( ipoib_path_cache_idx == IPOIB_NUM_CACHED_PATHS ) + ipoib_path_cache_idx = 0; } ipoib->meta.recv_fill--; @@ -590,7 +671,7 @@ int ipoib_probe ( struct ib_device *ibdev ) { IPOIB_META_NUM_CQES, IPOIB_META_NUM_SEND_WQES, IPOIB_META_NUM_RECV_WQES, - IB_SA_QKEY ) ) != 0 ) { + IB_GLOBAL_QKEY ) ) != 0 ) { DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n", ipoib, strerror ( rc ) ); goto err_create_meta_qset; diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 6b5bd066a..0fd832e70 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -749,7 +749,7 @@ static void arbel_ring_doorbell ( struct arbel *arbel, /** GID used for GID-less send work queue entries */ static const struct ib_gid arbel_no_gid = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0 } + { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0 } } }; /** @@ -1238,12 +1238,12 @@ static int arbel_get_port_gid ( struct arbel *arbel, /* Port info gives us the first half of the port GID */ if ( ( rc = arbel_get_port_info ( arbel, &u.port_info ) ) != 0 ) return rc; - memcpy ( &port_gid->bytes[0], u.port_info.gid_prefix, 8 ); - + memcpy ( &port_gid->u.bytes[0], u.port_info.gid_prefix, 8 ); + /* GUID info gives us the second half of the port GID */ if ( ( rc = arbel_get_guid_info ( arbel, &u.guid_info ) ) != 0 ) return rc; - memcpy ( &port_gid->bytes[8], u.guid_info.gid_local, 8 ); + memcpy ( &port_gid->u.bytes[8], u.guid_info.gid_local, 8 ); return 0; } @@ -1262,8 +1262,8 @@ static int arbel_get_sm_lid ( struct arbel *arbel, static int arbel_get_broadcast_gid ( struct arbel *arbel, struct ib_gid *broadcast_gid ) { static const struct ib_gid ipv4_broadcast_gid = { - { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } + { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } } }; struct ib_mad_pkey_table pkey_table; int rc; @@ -1275,7 +1275,7 @@ static int arbel_get_broadcast_gid ( struct arbel *arbel, /* Add partition key */ if ( ( rc = arbel_get_pkey_table ( arbel, &pkey_table ) ) != 0 ) return rc; - memcpy ( &broadcast_gid->bytes[4], &pkey_table.pkey[0][0], + memcpy ( &broadcast_gid->u.bytes[4], &pkey_table.pkey[0][0], sizeof ( pkey_table.pkey[0][0] ) ); return 0; diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 2d1d94338..845c4c228 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -14,11 +14,14 @@ #define IB_SA_QPN 1 /** Subnet administrator queue key */ -#define IB_SA_QKEY 0x80010000UL +#define IB_GLOBAL_QKEY 0x80010000UL /** An Infiniband Global Identifier */ struct ib_gid { - uint8_t bytes[16]; + union { + uint8_t bytes[16]; + uint32_t dwords[4]; + } u; }; /** An Infiniband Global Route Header */ @@ -136,7 +139,7 @@ struct ib_address_vector { /** Destination Queue Pair */ unsigned int dest_qp; /** Queue key */ - unsigned int qkey; + unsigned long qkey; /** Destination Local ID */ unsigned int dlid; /** Rate */ @@ -530,9 +533,12 @@ struct ib_mad_path_record { uint16_t slid; uint32_t hop_limit__flow_label__raw_traffic; uint32_t pkey__numb_path__reversible__tclass; - uint32_t rate__rate_selector__mtu__mtu_selector__sl__reserved; + uint8_t reserved1; + uint8_t reserved__sl; + uint8_t mtu_selector__mtu; + uint8_t rate_selector__rate; uint32_t preference__packet_lifetime__packet_lifetime_selector; - uint32_t reserved1[35]; + uint32_t reserved2[35]; } __attribute__ (( packed )); union ib_mad { -- cgit v1.2.3-55-g7522 From 2ed1acb9e938cbb5ae331c7d858f193fd40170b4 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 11:50:25 +0100 Subject: Broadcast GID is now calculated by IPoIB layer. --- src/drivers/net/ipoib.c | 103 ++++++++++++++++++++++++++++++++++-- src/drivers/net/mlx_ipoib/mt25218.c | 27 +++------- src/include/gpxe/infiniband.h | 25 ++++++++- 3 files changed, 130 insertions(+), 25 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index 81011405c..c4eea5a73 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -84,6 +84,8 @@ struct ipoib_device { struct ipoib_queue_set data; /** Data queue set */ struct ipoib_queue_set meta; + /** Broadcast GID */ + struct ib_gid broadcast_gid; }; /** @@ -112,6 +114,15 @@ static struct ipoib_cached_path ipoib_path_cache[IPOIB_NUM_CACHED_PATHS]; /** Oldest IPoIB path cache entry index */ static unsigned int ipoib_path_cache_idx = 0; +/** IPoIB metadata TID */ +static uint32_t ipoib_meta_tid = 0; + +/** IPv4 broadcast GID */ +static const struct ib_gid ipv4_broadcast_gid = { + { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } } +}; + /**************************************************************************** * * IPoIB link layer @@ -319,7 +330,6 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, struct io_buffer *iobuf; struct ib_mad_path_record *path_record; struct ib_address_vector av; - static uint32_t tid = 0; int rc; /* Allocate I/O buffer */ @@ -336,7 +346,7 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, path_record->mad_hdr.class_version = 2; path_record->mad_hdr.method = IB_MGMT_METHOD_GET; path_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC ); - path_record->mad_hdr.tid = tid++; + path_record->mad_hdr.tid = ipoib_meta_tid++; path_record->sa_hdr.comp_mask[1] = htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID ); memcpy ( &path_record->dgid, gid, sizeof ( path_record->dgid ) ); @@ -361,6 +371,65 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, return 0; } +/** + * Transmit multicast group membership request + * + * @v ipoib IPoIB device + * @v gid Multicast GID + * @v join Join (rather than leave) group + * @ret rc Return status code + */ +static int ipoib_mc_member_record ( struct ipoib_device *ipoib, + struct ib_gid *gid, int join ) { + struct ib_device *ibdev = ipoib->ibdev; + struct io_buffer *iobuf; + struct ib_mad_mc_member_record *mc_member_record; + struct ib_address_vector av; + int rc; + + /* Allocate I/O buffer */ + iobuf = alloc_iob ( sizeof ( *mc_member_record ) ); + if ( ! iobuf ) + return -ENOMEM; + iob_put ( iobuf, sizeof ( *mc_member_record ) ); + mc_member_record = iobuf->data; + memset ( mc_member_record, 0, sizeof ( *mc_member_record ) ); + + /* Construct path record request */ + mc_member_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION; + mc_member_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + mc_member_record->mad_hdr.class_version = 2; + mc_member_record->mad_hdr.method = + ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE ); + mc_member_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ); + mc_member_record->mad_hdr.tid = ipoib_meta_tid++; + mc_member_record->sa_hdr.comp_mask[1] = + htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_JOIN_STATE ); + mc_member_record->scope__join_state = 1; + memcpy ( &mc_member_record->mgid, gid, + sizeof ( mc_member_record->mgid ) ); + memcpy ( &mc_member_record->port_gid, &ibdev->port_gid, + sizeof ( mc_member_record->port_gid ) ); + + /* Construct address vector */ + memset ( &av, 0, sizeof ( av ) ); + av.dlid = ibdev->sm_lid; + av.dest_qp = IB_SA_QPN; + av.qkey = IB_GLOBAL_QKEY; + + /* Post send request */ + if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av, + iobuf ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n", + ipoib, strerror ( rc ) ); + free_iob ( iobuf ); + return rc; + } + + return 0; +} + /** * Transmit packet via IPoIB network device * @@ -591,7 +660,7 @@ static int ipoib_open ( struct net_device *netdev ) { /* Attach to broadcast multicast GID */ if ( ( rc = ib_mcast_attach ( ibdev, ipoib->data.qp, - &ibdev->broadcast_gid ) ) != 0 ) { + &ipoib->broadcast_gid ) ) != 0 ) { DBG ( "Could not attach to broadcast GID: %s\n", strerror ( rc ) ); return rc; @@ -628,6 +697,27 @@ static struct net_device_operations ipoib_operations = { .irq = ipoib_irq, }; +/** + * Join IPoIB broadcast group + * + * @v ipoib IPoIB device + * @ret rc Return status code + */ +int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) { + int rc; + + /* Send join request */ + if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid, + 1 ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n", + ipoib, strerror ( rc ) ); + return rc; + } + + + return 0; +} + /** * Probe IPoIB device * @@ -652,6 +742,11 @@ int ipoib_probe ( struct ib_device *ibdev ) { ipoib->netdev = netdev; ipoib->ibdev = ibdev; + /* Calculate broadcast GID */ + memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid, + sizeof ( ipoib->broadcast_gid ) ); + ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey ); + /* Allocate metadata queue set */ if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta, IPOIB_META_NUM_CQES, @@ -663,6 +758,8 @@ int ipoib_probe ( struct ib_device *ibdev ) { goto err_create_meta_qset; } + + /* Allocate data queue set */ if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data, IPOIB_DATA_NUM_CQES, diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 5f3c3c4da..920874cc3 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -1251,25 +1251,13 @@ static int arbel_get_sm_lid ( struct arbel *arbel, return 0; } -static int arbel_get_broadcast_gid ( struct arbel *arbel, - struct ib_gid *broadcast_gid ) { - static const struct ib_gid ipv4_broadcast_gid = { - { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } } - }; +static int arbel_get_pkey ( struct arbel *arbel, unsigned long *pkey ) { struct ib_mad_pkey_table pkey_table; int rc; - /* Start with the IPv4 broadcast GID */ - memcpy ( broadcast_gid, &ipv4_broadcast_gid, - sizeof ( *broadcast_gid ) ); - - /* Add partition key */ if ( ( rc = arbel_get_pkey_table ( arbel, &pkey_table ) ) != 0 ) return rc; - memcpy ( &broadcast_gid->u.bytes[4], &pkey_table.pkey[0][0], - sizeof ( pkey_table.pkey[0][0] ) ); - + *pkey = ntohs ( pkey_table.pkey[0][0] ); return 0; } @@ -1340,12 +1328,11 @@ static int arbel_probe ( struct pci_device *pci, goto err_get_port_gid; } - /* Get broadcast GID */ - if ( ( rc = arbel_get_broadcast_gid ( arbel, - &ibdev->broadcast_gid ) ) != 0 ){ - DBGC ( arbel, "Arbel %p could not determine broadcast GID: " + /* Get partition key */ + if ( ( rc = arbel_get_pkey ( arbel, &ibdev->pkey ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine partition key: " "%s\n", arbel, strerror ( rc ) ); - goto err_get_broadcast_gid; + goto err_get_pkey; } struct ud_av_st *bcast_av = ib_data.bcast_av; @@ -1370,7 +1357,7 @@ static int arbel_probe ( struct pci_device *pci, return 0; err_ipoib_probe: - err_get_broadcast_gid: + err_get_pkey: err_get_port_gid: err_get_sm_lid: err_query_dev_lim: diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 845c4c228..8e358bce9 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -20,6 +20,7 @@ struct ib_gid { union { uint8_t bytes[16]; + uint16_t words[8]; uint32_t dwords[4]; } u; }; @@ -258,10 +259,10 @@ struct ib_device_operations { struct ib_device { /** Port GID */ struct ib_gid port_gid; - /** Broadcast GID */ - struct ib_gid broadcast_gid; /** Subnet manager LID */ unsigned long sm_lid; + /** Partition key */ + unsigned int pkey; /** Underlying device */ struct device *dev; /** Infiniband operations */ @@ -541,6 +542,26 @@ struct ib_mad_path_record { uint32_t reserved2[35]; } __attribute__ (( packed )); +struct ib_mad_mc_member_record { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + struct ib_sa_hdr sa_hdr; + struct ib_gid mgid; + struct ib_gid port_gid; + uint32_t qkey; + uint16_t mlid; + uint8_t mtu_selector__mtu; + uint8_t tclass; + uint16_t pkey; + uint8_t rate_selector__rate; + uint8_t packet_lifetime_selector__packet_lifetime; + uint32_t sl__flow_label__hop_limit; + uint8_t scope__join_state; + uint8_t proxy_join__reserved; + uint16_t reserved0; + uint32_t reserved1[37]; +} __attribute__ (( packed )); + union ib_mad { struct ib_mad_hdr mad_hdr; struct ib_mad_data data; -- cgit v1.2.3-55-g7522 From bdac59172695b746b48550a1ebff02c405e7b3f8 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 13:31:13 +0100 Subject: Some interesting packet corruption happening now. --- src/drivers/net/ipoib.c | 223 +++++++++++++++++++++++++++++------- src/drivers/net/mlx_ipoib/mt25218.c | 1 + src/include/gpxe/infiniband.h | 10 +- 3 files changed, 188 insertions(+), 46 deletions(-) (limited to 'src/include') diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index c4eea5a73..077912b30 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -18,9 +18,11 @@ #include #include +#include #include #include #include +#include "timer.h" #include #include #include @@ -86,6 +88,12 @@ struct ipoib_device { struct ipoib_queue_set meta; /** Broadcast GID */ struct ib_gid broadcast_gid; + /** Broadcast LID */ + unsigned int broadcast_lid; + /** Joined to broadcast group */ + int broadcast_joined; + /** Data queue key */ + unsigned long data_qkey; }; /** @@ -114,6 +122,12 @@ static struct ipoib_cached_path ipoib_path_cache[IPOIB_NUM_CACHED_PATHS]; /** Oldest IPoIB path cache entry index */ static unsigned int ipoib_path_cache_idx = 0; +/** TID half used to identify get path record replies */ +#define IPOIB_TID_GET_PATH_REC 0x11111111UL + +/** TID half used to identify multicast member record replies */ +#define IPOIB_TID_MC_MEMBER_REC 0x22222222UL + /** IPoIB metadata TID */ static uint32_t ipoib_meta_tid = 0; @@ -123,6 +137,9 @@ static const struct ib_gid ipv4_broadcast_gid = { 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } } }; +/** Maximum time we will wait for the broadcast join to succeed */ +#define IPOIB_JOIN_MAX_DELAY_MS 1000 + /**************************************************************************** * * IPoIB link layer @@ -181,8 +198,8 @@ static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { /* Sanity check */ if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) { - DBG ( "IPoIB packet too short (%d bytes)\n", - iob_len ( iobuf ) ); + DBG ( "IPoIB packet too short for link-layer header\n" ); + DBG_HD ( iobuf->data, iob_len ( iobuf ) ); free_iob ( iobuf ); return -EINVAL; } @@ -346,7 +363,8 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, path_record->mad_hdr.class_version = 2; path_record->mad_hdr.method = IB_MGMT_METHOD_GET; path_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC ); - path_record->mad_hdr.tid = ipoib_meta_tid++; + path_record->mad_hdr.tid[0] = IPOIB_TID_GET_PATH_REC; + path_record->mad_hdr.tid[1] = ipoib_meta_tid++; path_record->sa_hdr.comp_mask[1] = htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID ); memcpy ( &path_record->dgid, gid, sizeof ( path_record->dgid ) ); @@ -402,7 +420,8 @@ static int ipoib_mc_member_record ( struct ipoib_device *ipoib, mc_member_record->mad_hdr.method = ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE ); mc_member_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ); - mc_member_record->mad_hdr.tid = ipoib_meta_tid++; + mc_member_record->mad_hdr.tid[0] = IPOIB_TID_MC_MEMBER_REC; + mc_member_record->mad_hdr.tid[1] = ipoib_meta_tid++; mc_member_record->sa_hdr.comp_mask[1] = htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE ); @@ -443,19 +462,29 @@ static int ipoib_transmit ( struct net_device *netdev, struct ib_device *ibdev = ipoib->ibdev; struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data; struct ib_address_vector av; + struct ib_gid *gid; struct ipoib_cached_path *path; int rc; + /* Sanity check */ if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) { DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib ); return -EINVAL; } + iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) ); /* Construct address vector */ memset ( &av, 0, sizeof ( av ) ); + av.qkey = IB_GLOBAL_QKEY; + av.gid_present = 1; if ( ipoib_pshdr->peer.qpn == htonl ( IPOIB_BROADCAST_QPN ) ) { /* Broadcast address */ +#if 0 memcpy ( &av, &hack_ipoib_bcast_av, sizeof ( av ) ); +#endif + av.dest_qp = IB_BROADCAST_QPN; + av.dlid = ipoib->broadcast_lid; + gid = &ipoib->broadcast_gid; } else { /* Unicast - look in path cache */ path = ipoib_find_cached_path ( &ipoib_pshdr->peer.gid ); @@ -467,15 +496,13 @@ static int ipoib_transmit ( struct net_device *netdev, return rc; } av.dest_qp = ntohl ( ipoib_pshdr->peer.qpn ); - av.qkey = IB_GLOBAL_QKEY; av.dlid = path->dlid; av.rate = path->rate; av.sl = path->sl; - av.gid_present = 1; - memcpy ( &av.gid, &ipoib_pshdr->peer.gid, sizeof ( av.gid ) ); + gid = &ipoib_pshdr->peer.gid; } + memcpy ( &av.gid, gid, sizeof ( av.gid ) ); - iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) ); return ib_post_send ( ibdev, ipoib->data.qp, &av, iobuf ); } @@ -515,14 +542,33 @@ static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused, if ( completion->syndrome ) { netdev_rx_err ( netdev, iobuf, -EIO ); - } else { - iob_put ( iobuf, completion->len ); - iob_pull ( iobuf, ( sizeof ( struct ib_global_route_header ) - - sizeof ( *ipoib_pshdr ) ) ); - /* FIXME: fill in a MAC address for the sake of AoE! */ - netdev_rx ( netdev, iobuf ); + goto done; + } + + iob_put ( iobuf, completion->len ); + if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) { + DBGC ( ipoib, "IPoIB %p received data packet too short to " + "contain GRH\n", ipoib ); + DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) ); + netdev_rx_err ( netdev, iobuf, -EIO ); + goto done; + } + iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); + + if ( iob_len ( iobuf ) < sizeof ( struct ipoib_real_hdr ) ) { + DBGC ( ipoib, "IPoIB %p received data packet too short to " + "contain IPoIB header\n", ipoib ); + DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) ); + netdev_rx_err ( netdev, iobuf, -EIO ); + goto done; } + ipoib_pshdr = iob_push ( iobuf, sizeof ( *ipoib_pshdr ) ); + /* FIXME: fill in a MAC address for the sake of AoE! */ + + netdev_rx ( netdev, iobuf ); + + done: ipoib->data.recv_fill--; } @@ -548,6 +594,52 @@ static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused, free_iob ( iobuf ); } +/** + * Handle received IPoIB path record + * + * @v ipoib IPoIB device + * @v path_record Path record + */ +static void ipoib_recv_path_record ( struct ipoib_device *ipoib __unused, + struct ib_mad_path_record *path_record ) { + struct ipoib_cached_path *path; + + /* Update path cache entry */ + path = &ipoib_path_cache[ipoib_path_cache_idx]; + memcpy ( &path->gid, &path_record->dgid, sizeof ( path->gid ) ); + path->dlid = ntohs ( path_record->dlid ); + path->sl = ( path_record->reserved__sl & 0x0f ); + path->rate = ( path_record->rate_selector__rate & 0x3f ); + + DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx dlid %x sl %x rate %x\n", + htonl ( path->gid.u.dwords[0] ), htonl ( path->gid.u.dwords[1] ), + htonl ( path->gid.u.dwords[2] ), htonl ( path->gid.u.dwords[3] ), + path->dlid, path->sl, path->rate ); + + /* Update path cache index */ + ipoib_path_cache_idx++; + if ( ipoib_path_cache_idx == IPOIB_NUM_CACHED_PATHS ) + ipoib_path_cache_idx = 0; +} + +/** + * Handle received IPoIB multicast membership record + * + * @v ipoib IPoIB device + * @v mc_member_record Multicast membership record + */ +static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib, + struct ib_mad_mc_member_record *mc_member_record ) { + /* Record parameters */ + ipoib->broadcast_joined = + ( mc_member_record->scope__join_state & 0x0f ); + ipoib->data_qkey = ntohl ( mc_member_record->qkey ); + ipoib->broadcast_lid = ntohs ( mc_member_record->mlid ); + DBGC ( ipoib, "IPoIB %p %s broadcast group: qkey %lx mlid %x\n", + ipoib, ( ipoib->broadcast_joined ? "joined" : "left" ), + ipoib->data_qkey, ipoib->broadcast_lid ); +} + /** * Handle IPoIB metadata receive completion * @@ -562,36 +654,51 @@ static void ipoib_meta_complete_recv ( struct ib_device *ibdev __unused, struct io_buffer *iobuf ) { struct net_device *netdev = qp->owner_priv; struct ipoib_device *ipoib = netdev->priv; - struct ib_mad_path_record *path_record; - struct ipoib_cached_path *path; + union ib_mad *mad; if ( completion->syndrome ) { DBGC ( ipoib, "IPoIB %p metadata RX completion error %x\n", ipoib, completion->syndrome ); - } else { - /* Update path cache */ - iob_put ( iobuf, completion->len ); - iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); - path_record = iobuf->data; - path = &ipoib_path_cache[ipoib_path_cache_idx]; - memcpy ( &path->gid, &path_record->dgid, - sizeof ( path->gid ) ); - path->dlid = ntohs ( path_record->dlid ); - path->sl = ( path_record->reserved__sl & 0x0f ); - path->rate = ( path_record->rate_selector__rate & 0x3f ); - DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx dlid %x sl %x rate %x\n", - htonl ( path->gid.u.dwords[0] ), - htonl ( path->gid.u.dwords[1] ), - htonl ( path->gid.u.dwords[2] ), - htonl ( path->gid.u.dwords[3] ), - path->dlid, path->sl, path->rate ); - - /* Update path cache index */ - ipoib_path_cache_idx++; - if ( ipoib_path_cache_idx == IPOIB_NUM_CACHED_PATHS ) - ipoib_path_cache_idx = 0; + goto done; + } + + iob_put ( iobuf, completion->len ); + if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) { + DBGC ( ipoib, "IPoIB %p received metadata packet too short " + "to contain GRH\n", ipoib ); + DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) ); + goto done; + } + iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); + if ( iob_len ( iobuf ) < sizeof ( *mad ) ) { + DBGC ( ipoib, "IPoIB %p received metadata packet too short " + "to contain reply\n", ipoib ); + DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) ); + goto done; + } + mad = iobuf->data; + + if ( mad->mad_hdr.status != 0 ) { + DBGC ( ipoib, "IPoIB %p metadata RX err status %04x\n", + ipoib, ntohs ( mad->mad_hdr.status ) ); + goto done; + } + + switch ( mad->mad_hdr.tid[0] ) { + case IPOIB_TID_GET_PATH_REC: + ipoib_recv_path_record ( ipoib, &mad->path_record ); + break; + case IPOIB_TID_MC_MEMBER_REC: + ipoib_recv_mc_member_record ( ipoib, &mad->mc_member_record ); + break; + default: + DBGC ( ipoib, "IPoIB %p unwanted response:\n", + ipoib ); + DBGC_HD ( ipoib, mad, sizeof ( *mad ) ); + break; } + done: ipoib->meta.recv_fill--; free_iob ( iobuf ); } @@ -628,10 +735,10 @@ static void ipoib_poll ( struct net_device *netdev ) { struct ipoib_device *ipoib = netdev->priv; struct ib_device *ibdev = ipoib->ibdev; - ib_poll_cq ( ibdev, ipoib->data.cq, ipoib_data_complete_send, - ipoib_data_complete_recv ); ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send, ipoib_meta_complete_recv ); + ib_poll_cq ( ibdev, ipoib->data.cq, ipoib_data_complete_send, + ipoib_data_complete_recv ); ipoib_refill_recv ( ipoib, &ipoib->meta ); ipoib_refill_recv ( ipoib, &ipoib->data ); } @@ -703,9 +810,14 @@ static struct net_device_operations ipoib_operations = { * @v ipoib IPoIB device * @ret rc Return status code */ -int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) { +static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) { + struct ib_device *ibdev = ipoib->ibdev; + unsigned int delay_ms; int rc; + /* Make sure we have some receive descriptors */ + ipoib_refill_recv ( ipoib, &ipoib->meta ); + /* Send join request */ if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid, 1 ) ) != 0 ) { @@ -714,8 +826,23 @@ int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) { return rc; } + /* Wait for join to complete. Ideally we wouldn't delay for + * this long, but we need the queue key before we can set up + * the data queue pair, which we need before we can know the + * MAC address. + */ + for ( delay_ms = IPOIB_JOIN_MAX_DELAY_MS ; delay_ms ; delay_ms-- ) { + mdelay ( 1 ); + ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send, + ipoib_meta_complete_recv ); + ipoib_refill_recv ( ipoib, &ipoib->meta ); + if ( ipoib->broadcast_joined ) + return 0; + } + DBGC ( ipoib, "IPoIB %p timed out waiting for broadcast join\n", + ipoib ); - return 0; + return -ETIMEDOUT; } /** @@ -758,14 +885,23 @@ int ipoib_probe ( struct ib_device *ibdev ) { goto err_create_meta_qset; } +#if 0 + ipoib->data_qkey = hack_ipoib_qkey; +#endif + /* Join broadcast group */ + if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n", + ipoib, strerror ( rc ) ); + goto err_join_broadcast_group; + } /* Allocate data queue set */ if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data, IPOIB_DATA_NUM_CQES, IPOIB_DATA_NUM_SEND_WQES, IPOIB_DATA_NUM_RECV_WQES, - hack_ipoib_qkey ) ) != 0 ) { + ipoib->data_qkey ) ) != 0 ) { DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n", ipoib, strerror ( rc ) ); goto err_create_data_qset; @@ -784,6 +920,7 @@ int ipoib_probe ( struct ib_device *ibdev ) { err_register_netdev: ipoib_destroy_qset ( ipoib, &ipoib->data ); + err_join_broadcast_group: err_create_data_qset: ipoib_destroy_qset ( ipoib, &ipoib->meta ); err_create_meta_qset: diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 920874cc3..dc497add4 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -975,6 +975,7 @@ static int arbel_complete ( struct ib_device *ibdev, */ static void arbel_drain_eq ( struct arbel *arbel ) { #warning "drain the event queue" + drain_eq(); } /** diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 8e358bce9..06745ba9b 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -11,10 +11,13 @@ #include /** Subnet administrator QPN */ -#define IB_SA_QPN 1 +#define IB_SA_QPN 1 + +/** Broadcast QPN */ +#define IB_BROADCAST_QPN 0xffffffUL /** Subnet administrator queue key */ -#define IB_GLOBAL_QKEY 0x80010000UL +#define IB_GLOBAL_QKEY 0x80010000UL /** An Infiniband Global Identifier */ struct ib_gid { @@ -466,7 +469,7 @@ struct ib_mad_hdr { uint8_t method; uint16_t status; uint16_t class_specific; - uint64_t tid; + uint32_t tid[2]; uint16_t attr_id; uint16_t resv; uint32_t attr_mod; @@ -569,6 +572,7 @@ union ib_mad { struct ib_mad_port_info port_info; struct ib_mad_pkey_table pkey_table; struct ib_mad_path_record path_record; + struct ib_mad_mc_member_record mc_member_record; } __attribute__ (( packed )); #endif /* _GPXE_INFINIBAND_H */ -- cgit v1.2.3-55-g7522 From 2c56ede6f80127b1a352f4bafc94821fa98f127e Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 18:38:04 +0100 Subject: Moved iobuf.h assertions outside the static inline functions, so that the assert message's file and line number gives some clue as to the real location of the problem. Added similar assertions to list.h. --- src/include/gpxe/iobuf.h | 28 ++++++++++++++++++++++++---- src/include/gpxe/list.h | 18 ++++++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) (limited to 'src/include') diff --git a/src/include/gpxe/iobuf.h b/src/include/gpxe/iobuf.h index e3db01ac2..ff7877544 100644 --- a/src/include/gpxe/iobuf.h +++ b/src/include/gpxe/iobuf.h @@ -67,9 +67,13 @@ struct io_buffer { static inline void * iob_reserve ( struct io_buffer *iobuf, size_t len ) { iobuf->data += len; iobuf->tail += len; - assert ( iobuf->tail <= iobuf->end ); return iobuf->data; } +#define iob_reserve( iobuf, len ) ( { \ + void *__result; \ + __result = iob_reserve ( (iobuf), (len) ); \ + assert ( (iobuf)->tail <= (iobuf)->end ); \ + __result; } ) /** * Add data to start of I/O buffer @@ -80,9 +84,13 @@ static inline void * iob_reserve ( struct io_buffer *iobuf, size_t len ) { */ static inline void * iob_push ( struct io_buffer *iobuf, size_t len ) { iobuf->data -= len; - assert ( iobuf->data >= iobuf->head ); return iobuf->data; } +#define iob_push( iobuf, len ) ( { \ + void *__result; \ + __result = iob_push ( (iobuf), (len) ); \ + assert ( (iobuf)->data >= (iobuf)->head ); \ + __result; } ) /** * Remove data from start of I/O buffer @@ -96,6 +104,11 @@ static inline void * iob_pull ( struct io_buffer *iobuf, size_t len ) { assert ( iobuf->data <= iobuf->tail ); return iobuf->data; } +#define iob_pull( iobuf, len ) ( { \ + void *__result; \ + __result = iob_pull ( (iobuf), (len) ); \ + assert ( (iobuf)->data <= (iobuf)->tail ); \ + __result; } ) /** * Add data to end of I/O buffer @@ -107,9 +120,13 @@ static inline void * iob_pull ( struct io_buffer *iobuf, size_t len ) { static inline void * iob_put ( struct io_buffer *iobuf, size_t len ) { void *old_tail = iobuf->tail; iobuf->tail += len; - assert ( iobuf->tail <= iobuf->end ); return old_tail; } +#define iob_put( iobuf, len ) ( { \ + void *__result; \ + __result = iob_put ( (iobuf), (len) ); \ + assert ( (iobuf)->tail <= (iobuf)->end ); \ + __result; } ) /** * Remove data from end of I/O buffer @@ -119,8 +136,11 @@ static inline void * iob_put ( struct io_buffer *iobuf, size_t len ) { */ static inline void iob_unput ( struct io_buffer *iobuf, size_t len ) { iobuf->tail -= len; - assert ( iobuf->tail >= iobuf->data ); } +#define iob_unput( iobuf, len ) do { \ + iob_unput ( (iobuf), (len) ); \ + assert ( (iobuf)->tail >= (iobuf)->data ); \ + } while ( 0 ) /** * Empty an I/O buffer diff --git a/src/include/gpxe/list.h b/src/include/gpxe/list.h index 0e65901c8..602382be6 100644 --- a/src/include/gpxe/list.h +++ b/src/include/gpxe/list.h @@ -10,6 +10,7 @@ */ #include +#include /* * Simple doubly linked list implementation. @@ -62,6 +63,11 @@ static inline void __list_add ( struct list_head *new, static inline void list_add ( struct list_head *new, struct list_head *head ) { __list_add ( new, head, head->next ); } +#define list_add( new, head ) do { \ + assert ( (head)->next->prev == (head) ); \ + assert ( (head)->prev->next == (head) ); \ + list_add ( (new), (head) ); \ + } while ( 0 ) /** * Add a new entry to the tail of a list @@ -76,6 +82,11 @@ static inline void list_add_tail ( struct list_head *new, struct list_head *head ) { __list_add ( new, head->prev, head ); } +#define list_add_tail( new, head ) do { \ + assert ( (head)->next->prev == (head) ); \ + assert ( (head)->prev->next == (head) ); \ + list_add_tail ( (new), (head) ); \ + } while ( 0 ) /* * Delete a list entry by making the prev/next entries @@ -101,6 +112,13 @@ static inline void __list_del ( struct list_head * prev, static inline void list_del ( struct list_head *entry ) { __list_del ( entry->prev, entry->next ); } +#define list_del( entry ) do { \ + assert ( (entry)->prev != NULL ); \ + assert ( (entry)->next != NULL ); \ + assert ( (entry)->next->prev == (entry) ); \ + assert ( (entry)->prev->next == (entry) ); \ + list_del ( (entry) ); \ + } while ( 0 ) /** * Test whether a list is empty -- cgit v1.2.3-55-g7522 From 2f2388185641596ddedb608c5463dae997723db1 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 18 Sep 2007 06:37:22 +0100 Subject: Added arbel.c --- src/include/gpxe/errfile.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/include') diff --git a/src/include/gpxe/errfile.h b/src/include/gpxe/errfile.h index 325d23872..1b0ed33f1 100644 --- a/src/include/gpxe/errfile.h +++ b/src/include/gpxe/errfile.h @@ -102,9 +102,9 @@ #define ERRFILE_via_velocity ( ERRFILE_DRIVER | 0x00450000 ) #define ERRFILE_w89c840 ( ERRFILE_DRIVER | 0x00460000 ) #define ERRFILE_ipoib ( ERRFILE_DRIVER | 0x00470000 ) -#define ERRFILE_mt25218 ( ERRFILE_DRIVER | 0x00480000 ) #define ERRFILE_scsi ( ERRFILE_DRIVER | 0x00700000 ) +#define ERRFILE_arbel ( ERRFILE_DRIVER | 0x00710000 ) #define ERRFILE_aoe ( ERRFILE_NET | 0x00000000 ) #define ERRFILE_arp ( ERRFILE_NET | 0x00010000 ) -- cgit v1.2.3-55-g7522 From 30717896f5b0c8d42860c928440ccb3eb0a559cc Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 18 Sep 2007 07:02:41 +0100 Subject: IB can't afford many RX buffers, because the MTU is so large. Reduce the TCP window to compensate. --- src/include/gpxe/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/include') diff --git a/src/include/gpxe/tcp.h b/src/include/gpxe/tcp.h index d967791f1..e2753120c 100644 --- a/src/include/gpxe/tcp.h +++ b/src/include/gpxe/tcp.h @@ -275,7 +275,8 @@ struct tcp_options { * actually use 65536, we use a window size of (65536-4) to ensure * that payloads remain dword-aligned. */ -#define TCP_MAX_WINDOW_SIZE ( 65536 - 4 ) +//#define TCP_MAX_WINDOW_SIZE ( 65536 - 4 ) +#define TCP_MAX_WINDOW_SIZE 4096 /** * Path MTU -- cgit v1.2.3-55-g7522