summaryrefslogtreecommitdiffstats
path: root/src/drivers/bus/pcimsix.c
blob: 008c1c22fee63ca17dfea7c7f272319944f05d2b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
/*
 * Copyright (C) 2019 Michael Brown <mbrown@fensystems.co.uk>.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA.
 *
 * You can also choose to distribute this program under the terms of
 * the Unmodified Binary Distribution Licence (as given in the file
 * COPYING.UBDL), provided that you have satisfied its requirements.
 */

FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
FILE_SECBOOT ( PERMITTED );

#include <stdint.h>
#include <errno.h>
#include <assert.h>
#include <ipxe/pci.h>
#include <ipxe/pcimsix.h>

/** @file
 *
 * PCI MSI-X interrupts
 *
 * Interrupts as such are not used in iPXE, which operates in polling
 * mode.  However, some network cards (such as the Intel 40GbE and
 * 100GbE NICs) will defer writing out completions until the point of
 * asserting an MSI-X interrupt.
 *
 * From the point of view of the PCI device, asserting an MSI-X
 * interrupt is just a 32-bit DMA write of an opaque value to an
 * opaque target address.  The PCI device has no know to know whether
 * or not the target address corresponds to a real APIC.
 *
 * We can therefore trick the PCI device into believing that it is
 * asserting an MSI-X interrupt, by configuring it to write an opaque
 * 32-bit value to a dummy target address in host memory.  This is
 * sufficient to trigger the associated write of the completions to
 * host memory.
 *
 * When running in a virtual machine, the hypervisor will intercept
 * our attempt to configure MSI-X on the PCI device.  The physical
 * hardware will be configured to raise an interrupt under the
 * hypervisor's control, which will then be reflected back into the
 * virtual machine.  The opaque value that we write will be assumed to
 * indicate an interrupt vector number (as would normally be the case
 * when configuring MSI-X), and the opaque address will generally be
 * ignored.  The reflected interrupt will be ignored (since it is not
 * enabled within the virtual machine), but the device still asserts
 * an MSI-X interrupt and so still triggers the associated write of
 * the completions to host memory.
 *
 * Note that since the opaque target address will generally be ignored
 * by the hypervisor, we cannot examine the value present at the dummy
 * target address to find out whether or not an interrupt has been
 * raised.
 */

/**
 * Get MSI-X descriptor name (for debugging)
 *
 * @v cfg		Configuration space offset
 * @ret name		Descriptor name
 */
static const char * pci_msix_name ( unsigned int cfg ) {

	switch ( cfg ) {
	case PCI_MSIX_DESC_TABLE:	return "table";
	case PCI_MSIX_DESC_PBA:		return "PBA";
	default:			return "<UNKNOWN>";
	}
}

/**
 * Map MSI-X BAR portion
 *
 * @v pci		PCI device
 * @v msix		MSI-X capability
 * @v cfg		Configuration space offset
 * @ret io		I/O address
 */
static void * pci_msix_ioremap ( struct pci_device *pci, struct pci_msix *msix,
				 unsigned int cfg ) {
	uint32_t desc;
	unsigned int bar;
	unsigned long start;
	unsigned long offset;
	unsigned long base;
	void *io;

	/* Read descriptor */
	pci_read_config_dword ( pci, ( msix->cap + cfg ), &desc );

	/* Get BAR */
	bar = PCI_MSIX_DESC_BIR ( desc );
	offset = PCI_MSIX_DESC_OFFSET ( desc );
	start = pci_bar_start ( pci, PCI_BASE_ADDRESS ( bar ) );
	if ( ! start ) {
		DBGC ( msix, "MSI-X %p %s could not find BAR%d\n",
		       msix, pci_msix_name ( cfg ), bar );
		return NULL;
	}
	base = ( start + offset );
	DBGC ( msix, "MSI-X %p %s at %#08lx (BAR%d+%#lx)\n",
	       msix, pci_msix_name ( cfg ), base, bar, offset );

	/* Map BAR portion */
	io = pci_ioremap ( pci, ( start + offset ), PCI_MSIX_LEN );
	if ( ! io ) {
		DBGC ( msix, "MSI-X %p %s could not map %#08lx\n",
		       msix, pci_msix_name ( cfg ), base );
		return NULL;
	}

	return io;
}

/**
 * Enable MSI-X interrupts
 *
 * @v pci		PCI device
 * @v msix		MSI-X capability
 * @ret rc		Return status code
 */
int pci_msix_enable ( struct pci_device *pci, struct pci_msix *msix ) {
	uint16_t ctrl;
	physaddr_t msg;
	unsigned int i;
	int rc;

	/* Locate capability */
	msix->cap = pci_find_capability ( pci, PCI_CAP_ID_MSIX );
	if ( ! msix->cap ) {
		DBGC ( msix, "MSI-X %p found no MSI-X capability in "
		       PCI_FMT "\n", msix, PCI_ARGS ( pci ) );
		rc = -ENOENT;
		goto err_cap;
	}

	/* Extract interrupt count */
	pci_read_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), &ctrl );
	msix->count = ( PCI_MSIX_CTRL_SIZE ( ctrl ) + 1 );
	DBGC ( msix, "MSI-X %p has %d vectors for " PCI_FMT "\n",
	       msix, msix->count, PCI_ARGS ( pci ) );

	/* Map MSI-X table */
	msix->table = pci_msix_ioremap ( pci, msix, PCI_MSIX_DESC_TABLE );
	if ( ! msix->table ) {
		rc = -ENOENT;
		goto err_table;
	}

	/* Map pending bit array */
	msix->pba = pci_msix_ioremap ( pci, msix, PCI_MSIX_DESC_PBA );
	if ( ! msix->pba ) {
		rc = -ENOENT;
		goto err_pba;
	}

	/* Allocate dummy target */
	msix->msg = dma_alloc ( &pci->dma, &msix->map, sizeof ( *msix->msg ),
				sizeof ( *msix->msg ) );
	if ( ! msix->msg ) {
		rc = -ENOMEM;
		goto err_msg;
	}

	/* Map all interrupts to dummy target by default */
	msg = dma ( &msix->map, msix->msg );
	for ( i = 0 ; i < msix->count ; i++ )
		pci_msix_map ( msix, i, msg, 0 );

	/* Enable MSI-X */
	ctrl &= ~PCI_MSIX_CTRL_MASK;
	ctrl |= PCI_MSIX_CTRL_ENABLE;
	pci_write_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), ctrl );

	return 0;

	dma_free ( &msix->map, msix->msg, sizeof ( *msix->msg ) );
 err_msg:
	iounmap ( msix->pba );
 err_pba:
	iounmap ( msix->table );
 err_table:
 err_cap:
	return rc;
}

/**
 * Disable MSI-X interrupts
 *
 * @v pci		PCI device
 * @v msix		MSI-X capability
 */
void pci_msix_disable ( struct pci_device *pci, struct pci_msix *msix ) {
	uint16_t ctrl;

	/* Disable MSI-X */
	pci_read_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), &ctrl );
	ctrl &= ~PCI_MSIX_CTRL_ENABLE;
	pci_write_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), ctrl );

	/* Free dummy target */
	dma_free ( &msix->map, msix->msg, sizeof ( *msix->msg ) );

	/* Unmap pending bit array */
	iounmap ( msix->pba );

	/* Unmap MSI-X table */
	iounmap ( msix->table );
}

/**
 * Map MSI-X interrupt vector
 *
 * @v msix		MSI-X capability
 * @v vector		MSI-X vector
 * @v address		Message address
 * @v data		Message data
 */
void pci_msix_map ( struct pci_msix *msix, unsigned int vector,
		    physaddr_t address, uint32_t data ) {
	void *base;

	/* Sanity check */
	assert ( vector < msix->count );

	/* Map interrupt vector */
	base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
	writel ( ( address & 0xffffffffUL ), ( base + PCI_MSIX_ADDRESS_LO ) );
	if ( sizeof ( address ) > sizeof ( uint32_t ) ) {
		writel ( ( ( ( uint64_t ) address ) >> 32 ),
			 ( base + PCI_MSIX_ADDRESS_HI ) );
	} else {
		writel ( 0, ( base + PCI_MSIX_ADDRESS_HI ) );
	}
	writel ( data, ( base + PCI_MSIX_DATA ) );
}

/**
 * Control MSI-X interrupt vector
 *
 * @v msix		MSI-X capability
 * @v vector		MSI-X vector
 * @v mask		Control mask
 */
void pci_msix_control ( struct pci_msix *msix, unsigned int vector,
			uint32_t mask ) {
	void *base;
	uint32_t ctrl;

	/* Mask/unmask interrupt vector */
	base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
	ctrl = readl ( base + PCI_MSIX_CONTROL );
	ctrl &= ~PCI_MSIX_CONTROL_MASK;
	ctrl |= mask;
	writel ( ctrl, ( base + PCI_MSIX_CONTROL ) );
}

/**
 * Dump MSI-X interrupt state (for debugging)
 *
 * @v msix		MSI-X capability
 * @v vector		MSI-X vector
 */
void pci_msix_dump ( struct pci_msix *msix, unsigned int vector ) {
	void *base;
	uint32_t address_hi;
	uint32_t address_lo;
	physaddr_t address;
	uint32_t data;
	uint32_t ctrl;
	uint32_t pba;

	/* Do nothing in non-debug builds */
	if ( ! DBG_LOG )
		return;

	/* Mask/unmask interrupt vector */
	base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
	address_hi = readl ( base + PCI_MSIX_ADDRESS_HI );
	address_lo = readl ( base + PCI_MSIX_ADDRESS_LO );
	data = readl ( base + PCI_MSIX_DATA );
	ctrl = readl ( base + PCI_MSIX_CONTROL );
	pba = readl ( msix->pba );
	address = ( ( ( ( uint64_t ) address_hi ) << 32 ) | address_lo );
	DBGC ( msix, "MSI-X %p vector %d %#08x => %#08lx%s%s\n",
	       msix, vector, data, address,
	       ( ( ctrl & PCI_MSIX_CONTROL_MASK ) ? " (masked)" : "" ),
	       ( ( pba & ( 1 << vector ) ) ? " (pending)" : "" ) );
}