summaryrefslogtreecommitdiffstats
path: root/src/arch/riscv/prefix/libprefix.S
blob: 338131103089d0af6e40a94a16f3fcadb9c23a78 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
/*
 * Copyright (C) 2025 Michael Brown <mbrown@fensystems.co.uk>.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA.
 *
 * You can also choose to distribute this program under the terms of
 * the Unmodified Binary Distribution Licence (as given in the file
 * COPYING.UBDL), provided that you have satisfied its requirements.
 */

	FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL )

/** @file
 *
 * RISC-V prefix library
 *
 */

#include <config/serial.h>
#include <config/fault.h>

	.section ".note.GNU-stack", "", @progbits
	.text

	/* Link-time base address of _prefix
	 *
	 * This will be not be updated if runtime relocations are applied.
	 */
	.section ".rodata.prefix_link", "a", @progbits
	.balign	( __riscv_xlen / 8 )
prefix_link:
	.dword	_base
	.size	prefix_link, . - prefix_link

	/* Virtual address of _prefix
	 *
	 * This will be updated if runtime relocations are applied.
	 */
	.section ".rodata.prefix_virt", "a", @progbits
	.balign	( __riscv_xlen / 8 )
prefix_virt:
	.dword	_prefix
	.size	prefix_virt, . - prefix_virt

/*****************************************************************************
 *
 * Print character via debug console extension
 *
 *****************************************************************************
 *
 * Print a single character via the SBI DBCN extension.
 *
 * Parameters:
 *
 *   a0 - Character to print
 *
 * Returns:
 *
 *   a0 - Zero if character printed successfully
 *   a1 - Overwritten
 *   a6 - Overwritten
 *   a7 - Overwritten
 *
 */

/* SBI debug console extension */
#define SBI_DBCN ( ( 'D' << 24 ) | ( 'B' << 16 ) | ( 'C' << 8 ) | 'N' )
#define SBI_DBCN_WRITE_BYTE 0x02

	.macro	print_char_dbcn
	li	a7, SBI_DBCN
	li	a6, SBI_DBCN_WRITE_BYTE
	ecall
	.endm

/*****************************************************************************
 *
 * Print character via legacy extension
 *
 *****************************************************************************
 *
 * Print a single character via the SBI putchar legacy extension.
 *
 * Parameters:
 *
 *   a0 - Character to print
 *
 * Returns:
 *
 *   a0 - Overwritten
 *   a7 - Overwritten
 *
 */

/* SBI legacy console putchar */
#define SBI_LEGACY_PUTCHAR 0x01

	.macro	print_char_legacy
	li	a7, SBI_LEGACY_PUTCHAR
	ecall
	.endm

/*****************************************************************************
 *
 * Print character via early UART
 *
 *****************************************************************************
 *
 * Print a single character via a UART.
 *
 * For devices without a functional SBI console, a UART at a hardcoded
 * address can be used as a last resort mechanism for obtaining debug
 * output from the prefix.
 *
 * Parameters:
 *
 *   a0 - Character to print
 *
 * Returns:
 *
 *   a0 - Preserved
 *   a1 - May be overwritten
 *   a6 - May be overwritten
 *   a7 - May be overwritten
 *
 */

/* Default to no UART, if not specified */
#ifndef EARLY_UART_MODEL
#define EARLY_UART_MODEL none
#endif

/* Default to a register shift of zero, if not specified */
#ifndef EARLY_UART_REG_SHIFT
#define EARLY_UART_REG_SHIFT 0
#endif

#define print_char_uart _C2 ( print_char_uart_, EARLY_UART_MODEL )

#define early_uart_reg_base _C2 ( early_uart_reg_base_, __riscv_xlen )

	/* Print character via nonexistent UART */
	.macro	print_char_uart_none
	.endm

	/*
	 * Get UART base address (64-bit addressing)
	 */
	.macro	early_uart_reg_base_64 reg
	csrr	\reg, satp
	beqz	\reg, early_uart_reg_base_64_nonpaged_\@
	LOADN	\reg, early_uart_reg_base_64_virt
	j	early_uart_reg_base_64_done_\@
early_uart_reg_base_64_nonpaged_\@:
	li	\reg, EARLY_UART_REG_BASE
early_uart_reg_base_64_done_\@:
	.endm

	/*
	 * Get UART base address (32-bit addressing)
	 */
	.macro	early_uart_reg_base_32 reg
	li	\reg, EARLY_UART_REG_BASE
	sub	\reg, \reg, tp
	.endm

/*****************************************************************************
 *
 * Print character via 8250-compatible early UART
 *
 *****************************************************************************
 *
 * Print a single character via an 8250- or 16550-compatible UART.
 *
 * Parameters:
 *
 *   a0 - Character to print
 *
 * Returns:
 *
 *   a0 - Preserved
 *   a1 - Overwritten
 *   a7 - Overwritten
 *
 */

/* 8250-compatible UART transmit registers */
#define EARLY_UART_8250_TX		( 0 << EARLY_UART_REG_SHIFT )
#define EARLY_UART_8250_LSR		( 5 << EARLY_UART_REG_SHIFT )
#define EARLY_UART_8250_LSR_THRE	0x20

	.macro	print_char_uart_8250
	early_uart_reg_base a7
	sb	a0, EARLY_UART_8250_TX(a7)
	fence
early_uart_8250_wait_\@:
	lbu	a1, EARLY_UART_8250_LSR(a7)
	andi	a1, a1, EARLY_UART_8250_LSR_THRE
	beqz	a1, early_uart_8250_wait_\@
	.endm

/*****************************************************************************
 *
 * Print character via SiFive-compatible early UART
 *
 *****************************************************************************
 *
 * Print a single character via a SiFive-compatible UART.
 *
 * Parameters:
 *
 *   a0 - Character to print
 *
 * Returns:
 *
 *   a0 - Preserved
 *   a1 - Overwritten
 *   a7 - Overwritten
 *
 */

/* SiFive-compatible UART transmit registers */
#define EARLY_UART_SIFIVE_TXFIFO	( 0 << EARLY_UART_REG_SHIFT )

	.macro	print_char_uart_sifive
	early_uart_reg_base a7
	sw	a0, EARLY_UART_SIFIVE_TXFIFO(a7)
	fence
early_uart_sifive_wait_\@:
	lw	a1, EARLY_UART_SIFIVE_TXFIFO(a7)
	bltz	a1, early_uart_sifive_wait_\@
	.endm

/*****************************************************************************
 *
 * Print single character to early UART (from C code)
 *
 *****************************************************************************
 *
 * This function is called by the SBI console driver to output a
 * character to the early UART (if enabled).
 *
 * The standard C ABI applies to this function.
 *
 * Parameters:
 *
 *   a0 - Character to print
 *
 * Returns: none
 *
 */

	.section ".prefix.early_uart_putchar", "ax", @progbits
	.globl	early_uart_putchar
early_uart_putchar:
	print_char_uart
	ret
	.size	early_uart_putchar, . - early_uart_putchar

/*****************************************************************************
 *
 * Print message to debug console
 *
 *****************************************************************************
 *
 * Print a NUL-terminated string to the debug console.
 *
 * This function prints one character at a time via the "write byte"
 * call (rather than using "write string"), since this avoids any need
 * to know the current virtual-physical address translation.  It does
 * not require a valid stack.
 *
 * Note that the parameter is passed in register t1 (rather than a0)
 * and all non-temporary registers are preserved.
 *
 * Parameters:
 *
 *   t1 - Pointer to string
 *
 * Returns: none
 *
 */

	.section ".prefix.print_message", "ax", @progbits
	.globl	print_message
print_message:
	/* Handle alternate link register */
	mv	t0, ra
print_message_alt:
	/* Register usage:
	 *
	 * a0 - current character
	 * t0 - alternate link register
	 * t1 - character pointer
	 * t2 - preserved a0
	 * t3 - preserved a1
	 * t4 - preserved a6
	 * t5 - preserved a7
	 */
	mv	t2, a0
	mv	t3, a1
	mv	t4, a6
	mv	t5, a7

1:	/* Print each character in turn */
	lbu	a0, (t1)
	addi	t1, t1, 1
	beqz	a0, 2f
	print_char_uart
	print_char_dbcn
	beqz	a0, 1b
	lbu	a0, -1(t1)
	print_char_legacy
	j	1b
2:
	/* Restore registers and return (via alternate link register) */
	mv	a7, t5
	mv	a6, t4
	mv	a1, t3
	mv	a0, t2
	jr	t0
	.size	print_message, . - print_message

	/*
	 * Display progress message (if debugging is enabled)
	 */
	.macro	progress message
#ifndef NDEBUG
	.section ".rodata.progress_\@", "a", @progbits
progress_\@:
	.asciz	"\message"
	.size	progress_\@, . - progress_\@
	.previous
	la	t1, progress_\@
	jal	t0, print_message_alt
#endif
	.endm

/*****************************************************************************
 *
 * Print hexadecimal value to debug console
 *
 *****************************************************************************
 *
 * Print a register value in hexadecimal to the debug console.
 *
 * This function does not require a valid stack.
 *
 * Note that the parameters are passed in registers t1 and t2 (rather
 * than a0) and all non-temporary registers are preserved.
 *
 * Parameters:
 *
 *   t1 - Value to print
 *   t2 - Number of bits to print (must be a multiple of 4)
 *
 * Returns: none
 *
 */

	/*
	 * Convert a single nibble to an ASCII character
	 */
	.macro	nibble_to_ascii reg
	addi	\reg, \reg, -10
	bltz	\reg, dec_\@
	addi	\reg, \reg, ( 'a' - ( '0' + 10 ) )
dec_\@:	addi	\reg, \reg, ( '0' + 10 )
	.endm

	.section ".prefix.print_hex_value", "ax", @progbits
	.globl	print_hex_value
print_hex_value:
	/* Handle alternate link register */
	mv	t0, ra
print_hex_value_alt:
	/* Register usage:
	 *
	 * a0 - current digit / general temporary
	 * t0 - alternate link register
	 * t1 - current value
	 * t2 - digit counter
	 * t3 - preserved a0
	 * t4 - preserved a1
	 * t5 - preserved a6
	 * t6 - preserved a7
	 */
	mv	t3, a0
	mv	t4, a1
	mv	t5, a6
	mv	t6, a7

	/* Skip any unprinted digits */
	li	a0, __riscv_xlen
	sub	a0, a0, t2
	sll	t1, t1, a0

1:	/* Print each digit in turn */
	srli	a0, t1, ( __riscv_xlen - 4 )
	nibble_to_ascii a0
	print_char_uart
	print_char_dbcn
	beqz	a0, 2f
	srli	a0, t1, ( __riscv_xlen - 4 )
	nibble_to_ascii a0
	print_char_legacy
2:	slli	t1, t1, 4
	addi	t2, t2, -4
	bgtz	t2, 1b

	/* Restore registers and return (via alternate link register) */
	mv	a7, t6
	mv	a6, t5
	mv	a1, t4
	mv	a0, t3
	jr	t0
	.size	print_hex_value, . - print_hex_value

	/*
	 * Display hexadecimal register value (if debugging is enabled)
	 */
	.macro	print_hex_reg reg, bits=__riscv_xlen
#ifndef NDEBUG
	mv	t1, \reg
	li	t2, \bits
	jal	t0, print_hex_value_alt
#endif
	.endm

	/*
	 * Display hexadecimal symbol address (if debugging is enabled)
	 */
	.macro	print_hex_addr sym
#ifndef NDEBUG
	la	t1, \sym
	li	t2, __riscv_xlen
	jal	t0, print_hex_value_alt
#endif
	.endm

	/*
	 * Display hexadecimal data value (if debugging is enabled)
	 */
	.macro	print_hex_data sym
#ifndef NDEBUG
	LOADN	t1, \sym
	li	t2, __riscv_xlen
	jal	t0, print_hex_value_alt
#endif
	.endm

/*****************************************************************************
 *
 * Apply compressed relocation records
 *
 *****************************************************************************
 *
 * Apply compressed relocation records to fix up iPXE to run at its
 * current virtual address.
 *
 * This function must run before .bss is zeroed (since the relocation
 * records are overlaid with .bss).  It does not require a valid stack
 * pointer.
 *
 * Parameters: none
 *
 *   a0 - Relocation records
 *
 * Returns: none
 *
 */

/** Number of bits in a skip value */
#define ZREL_SKIP_BITS 19

	.section ".prefix.apply_relocs", "ax", @progbits
	.globl	apply_relocs
apply_relocs:
	/* Register usage:
	 *
	 * a0 - current relocation record pointer
	 * a1 - current relocation target address
	 * a2 - relocation addend
	 * a3 - current relocation record value
	 * a4 - number of bits remaining in current relocation record
	 */
	la	a1, _prefix

	/* Calculate relocation addend */
	LOADN	a2, prefix_virt
	sub	a2, a1, a2

	/* Skip applying relocations if addend is zero */
	beqz	a2, apply_relocs_done
	progress " reloc"

	/* Test writability
	 *
	 * We do this to avoid accidentally sending an undefined
	 * sequence of commands to a flash device, if we are started
	 * from read-only memory with no paging support.
	 *
	 * We attempt to write an all-ones pattern, on the basis that
	 * this pattern will harmlessly cause any flash device
	 * conforming to the CFI01 specification to enter the default
	 * "read array" state.
	 */
	la	t0, apply_relocs_test
	li	t1, -1
	STOREN	t1, (t0)
	LOADN	t2, (t0)
	bne	t1, t2, apply_relocs_failed

apply_relocs_loop:
	/* Read new relocation record */
	LOADN	a3, (a0)
	addi	a0, a0, ( __riscv_xlen / 8 )
	li	a4, ( __riscv_xlen - 1 )

	/* Consume and apply skip, if present (i.e. if MSB=0) */
	bltz	a3, 1f
	addi	a4, a4, -ZREL_SKIP_BITS
	srli	t0, a3, ( __riscv_xlen - ( ZREL_SKIP_BITS + 1 ) )
	slli	t0, t0, ( ( __riscv_xlen / 32 ) + 1 )
	add	a1, a1, t0
1:
	/* Apply relocations corresponding to set bits in record */
1:	andi	t0, a3, 1
	beqz	t0, 2f
	LOADN	t1, (a1)
	add	t1, t1, a2
	STOREN	t1, (a1)
2:	addi	a1, a1, ( __riscv_xlen / 8 )
	srli	a3, a3, 1
	addi	a4, a4, -1
	bnez	a4, 1b

	/* Loop until we have reached a terminator record (MSB=0, offset=0) */
	bnez	a3, apply_relocs_loop

	/* Check that relocations were applied successfully */
	la	t0, _prefix
	LOADN	t1, prefix_virt
	bne	t0, t1, apply_relocs_failed

apply_relocs_done:
	/* Return to caller */
	progress " ok\r\n"
	ret

apply_relocs_failed:
	/* Failure to apply relocations (if relocations were needed)
	 * is a fatal error.
	 */
	progress " failed\r\n"
	j	reset_system
	.size	apply_relocs, . - apply_relocs

	/* Writability test
	 *
	 * Placed within .data rather than .bss, since we need this to
	 * be within the range of the stored iPXE image.
	 */
	.section ".data.apply_relocs_test", "aw", @progbits
	.balign	( __riscv_xlen / 8 )
apply_relocs_test:
	.space	( __riscv_xlen / 8 )
	.size	apply_relocs_test, . - apply_relocs_test

/*****************************************************************************
 *
 * Enable paging
 *
 *****************************************************************************
 *
 * This function must be called with flat physical addressing.  It
 * does not require a valid stack pointer.
 *
 * Parameters:
 *
 *   a0 - Page table to fill in (4kB, must be aligned to a 4kB boundary)
 *
 * Returns:
 *
 *   a0 - Size of accessible physical address space (or zero for no limit)
 *   tp - Virtual address offset
 *   pc - Updated to a virtual address if paging enabled
 *
 */

/** Number of bits in a page offset */
#define PAGE_SHIFT 12

/** Page size */
#define PAGE_SIZE ( 1 << PAGE_SHIFT )

/** Size of a page table entry (log2) */
#define PTE_SIZE_LOG2 ( ( __riscv_xlen / 32 ) + 1 )

/** Size of a page table entry */
#define PTE_SIZE ( 1 << PTE_SIZE_LOG2 )

/** Number of page table entries (log2) */
#define PTE_COUNT_LOG2 ( PAGE_SHIFT - PTE_SIZE_LOG2 )

/** Number of page table entries */
#define PTE_COUNT ( 1 << PTE_COUNT_LOG2 )

/** Number of bits in a virtual or physical page number */
#define VPPN_SHIFT PTE_COUNT_LOG2

/* Page table entry flags */
#define PTE_V		0x00000001	/**< Page table entry is valid */
#define PTE_R		0x00000002	/**< Page is readable */
#define PTE_W		0x00000004	/**< Page is writable */
#define PTE_X		0x00000008	/**< Page is executable */
#define PTE_A		0x00000040	/**< Page has been accessed */
#define PTE_D		0x00000080	/**< Page is dirty */

/* Page table entry flags for our leaf pages */
#define PTE_LEAF ( PTE_D | PTE_A | PTE_X | PTE_W | PTE_R | PTE_V )

/** Physical page number LSB in PTE */
#define PTE_PPN_LSB(x) ( 10 + (x) * VPPN_SHIFT )
#define PTE_PPN4_LSB	PTE_PPN_LSB(4)	/**< PPN[4] LSB (Sv57) */
#define PTE_PPN3_LSB	PTE_PPN_LSB(3)	/**< PPN[3] LSB (Sv57 & Sv48) */
#define PTE_PPN2_LSB	PTE_PPN_LSB(2)	/**< PPN[2] LSB (Sv57, Sv48, & Sv39) */
#define PTE_PPN1_LSB	PTE_PPN_LSB(1)	/**< PPN[1] LSB (all levels) */
#define PTE_PPN0_LSB	PTE_PPN_LSB(0)	/**< PPN[0] LSB (all levels) */

/** Page table entry physical page address shift */
#define PTE_PPN_SHIFT ( PAGE_SHIFT - PTE_PPN0_LSB )

/** Virtual page number LSB */
#define VPN_LSB(x) ( PAGE_SHIFT + (x) * VPPN_SHIFT )
#define VPN4_LSB	VPN_LSB(4)	/**< VPN[4] LSB (Sv57) */
#define VPN3_LSB	VPN_LSB(3)	/**< VPN[3] LSB (Sv57 & Sv48) */
#define VPN2_LSB	VPN_LSB(2)	/**< VPN[2] LSB (Sv57, Sv48, & Sv39) */
#define VPN1_LSB	VPN_LSB(1)	/**< VPN[1] LSB (all levels) */
#define VPN0_LSB	VPN_LSB(0)	/**< VPN[0] LSB (all levels) */

/* Paging modes */
#define SATP_MODE_SV57	10		/**< Five-level paging (Sv57) */
#define SATP_MODE_SV48	9		/**< Four-level paging (Sv48) */
#define SATP_MODE_SV39	8		/**< Three-level paging (Sv39) */
#define SATP_MODE_SV32	1		/**< Two-level paging (Sv32) */

/** Paging mode shift */
#if __riscv_xlen == 64
#define SATP_MODE_SHIFT	60
#else
#define SATP_MODE_SHIFT	31
#endif

	.globl	enable_paging
	.equ	enable_paging, _C2 ( enable_paging_, __riscv_xlen )

	/* Paging mode names (for debug messages) */
	.section ".rodata.paging_mode_names", "a", @progbits
paging_mode_names:
	.asciz	"none"
	.org	( paging_mode_names + 5 * SATP_MODE_SV32 )
	.asciz	"Sv32"
	.org	( paging_mode_names + 5 * SATP_MODE_SV39 )
	.asciz	"Sv39"
	.org	( paging_mode_names + 5 * SATP_MODE_SV48 )
	.asciz	"Sv48"
	.org	( paging_mode_names + 5 * SATP_MODE_SV57 )
	.asciz	"Sv57"
	.size	paging_mode_names, . - paging_mode_names

	/*
	 * Display paging mode name (if debugging is enabled)
	 */
	.macro	paging_mode_name reg
#ifndef NDEBUG
	slli	t0, \reg, 2
	add	t0, t0, \reg
	la	t1, paging_mode_names
	add	t1, t1, t0
	jal	t0, print_message_alt
#endif
	.endm

	/* Maximum physical alignment
	 *
	 * We align to a "megapage" boundary to simplify the task of
	 * setting up page table mappings.
	 */
	.globl	_max_align
	.equ	_max_align, ( 1 << VPN1_LSB )

	/* Space for page table
	 *
	 * This can be used only once .bss is known to be writable.
	 */
	.section ".bss.page_table", "a", @nobits
	.globl	page_table
	.balign	PAGE_SIZE
page_table:
	.space	PAGE_SIZE
	.size	page_table, . - page_table

	/* Convert physical address to virtual address */
	.macro	phys_to_virt rd, rs:vararg
	_C2 ( phys_to_virt_, __riscv_xlen ) \rd, \rs
	.endm

/*****************************************************************************
 *
 * Disable paging
 *
 *****************************************************************************
 *
 * This function may be called with either virtual or flat physical
 * addressing.  It does not require a valid stack pointer.
 *
 * Parameters:
 *
 *   tp - Virtual address offset
 *
 * Returns:
 *
 *   tp - Virtual address offset (zeroed)
 *   pc - Updated to a physical address
 *
 */

	.globl	disable_paging
	.equ	disable_paging, _C2 ( disable_paging_, __riscv_xlen )

/*****************************************************************************
 *
 * Enable 64-bit paging
 *
 *****************************************************************************
 *
 * Construct a 64-bit page table to identity-map the whole of the
 * mappable physical address space, and to map iPXE itself at its
 * link-time address (which must be 2MB-aligned and be within the
 * upper half of the kernel address space).
 *
 * This function must be called with flat physical addressing.  It
 * does not require a valid stack pointer.
 *
 * Parameters:
 *
 *   a0 - Page table to fill in (4kB, must be aligned to a 4kB boundary)
 *
 * Returns:
 *
 *   a0 - Size of accessible physical address space (or zero for no limit)
 *   tp - Virtual address offset
 *   pc - Updated to a virtual address if paging enabled
 *
 * A 4kB 64-bit page table contains 512 8-byte PTEs.  We choose to use
 * these as:
 *
 *    - PTE[0-255] : Identity map for the physical address space.
 *
 *      This conveniently requires exactly 256 PTEs, regardless of the
 *      paging level.  Higher paging levels are able to identity-map a
 *      larger physical address space:
 *
 *      Sv57 : 256 x 256TB "petapages" (55-bit physical address space)
 *      Sv48 : 256 x 512GB "terapages" (46-bit physical address space)
 *      Sv39 : 256 x   1GB "gigapages" (37-bit physical address space)
 *
 *      Note that Sv48 and Sv39 cannot identity-map the whole of the
 *      available physical address space, since the virtual address
 *      space is not large enough (and is halved by the constraint
 *      that virtual addresses with bit 47/38 set must also have all
 *      higher bits set, and so cannot identity-map to a 55-bit
 *      physical address).
 *
 *    - PTE[x-y] : Virtual address map for iPXE
 *
 *      These are 2MB "megapages" used to map the link-time virtual
 *      address range used by iPXE itself.  We can use any 2MB-aligned
 *      range within 0xffffffffe0800000-0xffffffffffc00000, which
 *      breaks down as:
 *
 *         VPN[4] = 511     (in Sv57, must be all-ones in Sv48 and Sv39)
 *         VPN[3] = 511     (in Sv57 and Sv48, must be all-ones in Sv39)
 *         VPN[2] = 511     (in all paging levels)
 *         VPN[1] = 260-510 (in all paging levels)
 *         VPN[0] = 0       (in all paging levels)
 *
 *      In most builds, only a single 2MB "megapage" will be needed.
 *      We choose a link-time starting address of 0xffffffffeb000000
 *      within the permitted range, since the "eb" pattern is fairly
 *      distinctive and so makes it easy to visually identify any
 *      addresses originating from within iPXE's virtual address
 *      space.
 *
 *    - PTE[511] : Recursive next level page table pointer
 *
 *      This is a non-leaf PTE that points back to the page table
 *      itself.  It acts as the next level page table pointer for:
 *
 *         VPN[4] = 511 (in Sv57)
 *         VPN[3] = 511 (in Sv57 and Sv48)
 *         VPN[2] = 511 (in Sv57, Sv48, and Sv39)
 *
 *      This recursive usage creates some duplicate mappings within
 *      unused portions of the virtual address space, but allows us to
 *      use only a single physical 4kB page table.
 */

/** SBI base extension */
#define SBI_BASE 0x10
#define SBI_BASE_MVENDORID 0x04

/** Non-standard T-Head page table entry additional flags
 *
 * T-Head processors such as the C910 use the high bits of the PTE in
 * a very non-standard way that is incompatible with the RISC-V
 * specification.
 *
 * As per the "Memory Attribute Extension (XTheadMae)", bits 62 and 61
 * represent cacheability and "bufferability" (i.e. write-back
 * cacheability) respectively.  If we do not enable these bits, then
 * the processor gets incredibly confused at the point that paging is
 * enabled.  The symptom is that cache lines will occasionally fail to
 * fill, and so reads from any address may return unrelated data from
 * a previously read cache line for a different address.
 */
#define THEAD_PTE_MAEE ( 0x60 << ( __riscv_xlen - 8 ) )

/** T-Head vendor ID */
#define THEAD_MVENDORID 0x5b7

/** T-Head "sxstatus" CSR */
#define THEAD_CSR_SXSTATUS 0x5c0
#define THEAD_CSR_SXSTATUS_MAEE	0x00200000	/**< XTheadMae enabled */

	.section ".prefix.enable_paging_64", "ax", @progbits
enable_paging_64:
	/* Register usage:
	 *
	 * tp - return value (virtual address offset)
	 * a0 - page table base address
	 * a1 - currently attempted paging level
	 * a2 - enabled paging level
	 * a3 - PTE pointer
	 * a4 - PTE stride
	 * a5 - size of accessible physical address space
	 */
	progress " paging:"

	/* Calculate virtual address offset */
	LOADN	t0, prefix_link
	la	t1, _prefix
	sub	tp, t1, t0

	/* Zero PTE[0-511] */
	li	t0, PTE_COUNT
	mv	a3, a0
1:	STOREN	zero, (a3)
	addi	a3, a3, PTE_SIZE
	addi	t0, t0, -1
	bgtz	t0, 1b

	/* Construct PTE[511] as next level page table pointer */
	srli	t0, a0, PTE_PPN_SHIFT
	ori	t0, t0, PTE_V
	STOREN	t0, -PTE_SIZE(a3)

	/* Construct base page table entry for address zero */
	li	t0, PTE_LEAF
	STOREN	t0, (a0)

	/* Check for broken T-Head paging extensions */
	mv	a3, a0
	li	a7, SBI_BASE
	li	a6, SBI_BASE_MVENDORID
	ecall
	bnez	a0, 1f
	li	t0, THEAD_MVENDORID
	bne	a1, t0, 1f
	progress "thead-"
	csrr	t0, THEAD_CSR_SXSTATUS
	li	t1, THEAD_CSR_SXSTATUS_MAEE
	and	t0, t0, t1
	beqz	t0, 1f
	progress "mae-"
	LOADN	t0, (a3)
	li	t1, THEAD_PTE_MAEE
	or	t0, t0, t1
	STOREN	t0, (a3)
1:	mv	a0, a3

	/* Calculate PTE[x] address for iPXE virtual address map */
	LOADN	t0, prefix_link
	srli	t0, t0, VPN1_LSB
	andi	t0, t0, ( PTE_COUNT - 1 )
	slli	t0, t0, PTE_SIZE_LOG2
	add	a3, a0, t0

	/* Calculate PTE stride for iPXE virtual address map
	 *
	 * PPN[1] LSB is PTE bit 19 in all paging modes, and so the
	 * stride is always ( 1 << 19 )
	 */
	li	a4, 1
	slli	a4, a4, PTE_PPN1_LSB

	/* Construct PTE[x-1] for early UART, if applicable */
#ifdef EARLY_UART_REG_BASE
	li	t0, ( EARLY_UART_REG_BASE & ~( ( 1 << VPN1_LSB ) - 1 ) )
	srli	t0, t0, PTE_PPN_SHIFT
	ori	t0, t0, ( PTE_LEAF & ~PTE_X )
	STOREN	t0, -PTE_SIZE(a3)
#endif

	/* Construct PTE[x-y] for iPXE virtual address map */
	la	t0, _prefix
	srli	t0, t0, PTE_PPN_SHIFT
	LOADN	t1, (a0)
	or	t0, t0, t1
	la	t2, _ebss
	srli	t2, t2, PTE_PPN_SHIFT
1:	STOREN	t0, (a3)
	addi	a3, a3, PTE_SIZE
	add	t0, t0, a4
	ble	t0, t2, 1b

	/* Find highest supported paging level */
	li	a1, SATP_MODE_SV57
enable_paging_64_loop:

	/* Calculate PTE stride for identity map at this paging level
	 *
	 * a1 == 10 == Sv57: PPN[4] LSB is PTE bit 46  =>  stride := 1 << 46
	 * a1 ==  9 == Sv48: PPN[3] LSB is PTE bit 37  =>  stride := 1 << 37
	 * a1 ==  8 == Sv39: PPN[2] LSB is PTE bit 28  =>  stride := 1 << 28
	 *
	 * and so we calculate stride a4 := ( 1 << ( 9 * a1 - 44 ) )
	 */
	slli	a4, a1, 3
	add	a4, a4, a1
	addi	a4, a4, -44
	li	t0, 1
	sll	a4, t0, a4

	/* Calculate size of accessible physical address space
	 *
	 * The identity map comprises only the lower half of the PTEs,
	 * since virtual addresses for the higher half must have all
	 * high bits set, and so cannot form part of an identity map.
	 */
	slli	a5, a4, ( PTE_PPN_SHIFT + ( PTE_COUNT_LOG2 - 1 ) )

	/* Construct PTE[0-255] for identity map at this paging level */
	mv	a3, a0
	li	t0, ( PTE_COUNT / 2 )
	LOADN	t1, (a0)
1:	STOREN	t1, (a3)
	addi	a3, a3, PTE_SIZE
	add	t1, t1, a4
	addi	t0, t0, -1
	bgtz	t0, 1b

	/* Attempt to enable paging, and read back active paging level */
	slli	t0, a1, SATP_MODE_SHIFT
	srli	t1, a0, PAGE_SHIFT
	or	t0, t0, t1
	csrw	satp, t0
	sfence.vma
	csrr	a2, satp
	srli	a2, a2, SATP_MODE_SHIFT

	/* Loop until we successfully enable paging, or run out of levels */
	beq	a2, a1, 1f
	csrw	satp, zero
	addi	a1, a1, -1
	li	t0, SATP_MODE_SV39
	bge	a1, t0, enable_paging_64_loop
	mv	tp, zero
	mv	a5, zero
1:
	/* Adjust return address to a virtual address */
	sub	ra, ra, tp

	/* Return, with or without paging enabled */
	paging_mode_name a2
	mv	a0, a5
	ret
	.size	enable_paging_64, . - enable_paging_64

	/* Convert 64-bit physical address to virtual address */
	.macro	phys_to_virt_64 rd, rs:vararg
	.ifnb	\rs
	mv	\rd, \rs
	.endif
	.endm

	/* Early UART base address when 64-bit paging is enabled
	 *
	 * When an early UART is in use, we choose to use the 2MB
	 * "megapage" immediately below iPXE itself to map the UART.
	 */
#ifdef EARLY_UART_REG_BASE
	.section ".rodata.early_uart_reg_base_64_virt", "a", @progbits
	.balign	8
early_uart_reg_base_64_virt:
	.dword	( _base - ( 1 << VPN1_LSB ) + \
	          ( EARLY_UART_REG_BASE & ( ( 1 << VPN1_LSB ) - 1 ) ) )
	.size	early_uart_reg_base_64_virt, . - early_uart_reg_base_64_virt
#endif

/*****************************************************************************
 *
 * Disable 64-bit paging
 *
 *****************************************************************************
 *
 * This function may be called with either virtual or flat physical
 * addressing.  It does not require a valid stack pointer.
 *
 * Parameters:
 *
 *   tp - Virtual address offset
 *
 * Returns:
 *
 *   tp - Virtual address offset (zeroed)
 *   pc - Updated to a physical address
 *
 */

	.section ".prefix.disable_paging_64", "ax", @progbits
disable_paging_64:
	/* Register usage:
	 *
	 * tp - virtual address offset
	 */

	/* Jump to physical address */
	la	t0, 1f
	bgez	t0, 1f
	add	t0, t0, tp
	jr	t0
1:
	/* Disable paging */
	csrw	satp, zero
	sfence.vma

	/* Update return address to a physical address */
	bgez	ra, 1f
	add	ra, ra, tp
1:
	/* Return with paging disabled and virtual offset zeroed */
	mv	tp, zero
	ret
	.size	disable_paging_64, . - disable_paging_64

/*****************************************************************************
 *
 * Enable 32-bit paging
 *
 *****************************************************************************
 *
 * Construct a 32-bit page table to map the whole of the 32-bit
 * address space with a fixed offset selected to map iPXE itself at
 * its link-time address (which must be 4MB-aligned).
 *
 * This function must be called with flat physical addressing.  It
 * does not require a valid stack pointer.
 *
 * Parameters:
 *
 *   a0 - Page table to fill in (4kB, must be aligned to a 4kB boundary)
 *
 * Returns:
 *
 *   a0 - Size of accessible physical address space (or zero for no limit)
 *   tp - Virtual address offset
 *   pc - Updated to a virtual address if paging enabled
 *
 * A 4kB 32-bit page table contains 1024 4-byte PTEs.  We choose to
 * use these to produce a circular map of the 32-bit address space
 * using 4MB "megapages", with a fixed offset to align the virtual and
 * link-time addresses.
 *
 * To handle the transition from physical to virtual addresses, we
 * temporarily adjust the PTE covering the current program counter to
 * be a direct physical map (so that the program counter remains valid
 * at the moment when paging is enabled), then jump to a virtual
 * address, then restore the temporarily modified PTE.
 */

	.equ	enable_paging_32_xalign, 32

	.section ".prefix.enable_paging_32", "ax", @progbits
enable_paging_32:
	/* Register usage:
	 *
	 * tp - return value (virtual address offset)
	 * a0 - page table base address
	 * a1 - enabled paging level
	 * a2 - PTE pointer
	 * a3 - saved content of temporarily modified PTE
	 */
	progress " paging:"

	/* Calculate virtual address offset */
	LOADN	t0, prefix_link
	la	t1, _prefix
	sub	tp, t1, t0

	/* Construct PTEs for circular map */
	mv	a2, a0
	li	t0, PTE_COUNT
	mv	t1, tp
	ori	t1, t1, ( PTE_LEAF << PTE_PPN_SHIFT )
	li	t2, ( 1 << ( PTE_PPN1_LSB + PTE_PPN_SHIFT ) )
1:	srli	t3, t1, PTE_PPN_SHIFT
	STOREN	t3, (a2)
	addi	a2, a2, PTE_SIZE
	add	t1, t1, t2
	addi	t0, t0, -1
	bgtz	t0, 1b

	/* Temporarily modify PTE for transition code to be an identity map */
	la	t0, enable_paging_32_xstart
	srli	t0, t0, VPN1_LSB
	slli	t1, t0, PTE_SIZE_LOG2
	add	a2, a0, t1
	LOADN	a3, (a2)
	slli	t0, t0, PTE_PPN1_LSB
	ori	t0, t0, PTE_LEAF
	STOREN	t0, (a2)

	/* Adjust PTE pointer to a virtual address */
	sub	a2, a2, tp

	/* Attempt to enable paging, and read back active paging level */
	la	t0, 1f
	sub	t0, t0, tp
	li	t1, ( SATP_MODE_SV32 << SATP_MODE_SHIFT )
	srli	t2, a0, PAGE_SHIFT
	or	t1, t1, t2
	.balign	enable_paging_32_xalign
	/* Start of transition code */
enable_paging_32_xstart:
	csrw	satp, t1
	sfence.vma
	csrr	a1, satp
	beqz	a1, 2f
	jr	t0
1:	/* Restore temporarily modified PTE */
	STOREN	a3, (a2)
	sfence.vma
	/* End of transition code */
	.equ	enable_paging_32_xlen, . - enable_paging_32_xstart
2:	srli	a1, a1, SATP_MODE_SHIFT

	/* Zero SATP and virtual address offset if paging is not enabled */
	bnez	a1, 1f
	csrw	satp, zero
	mv	tp, zero
1:
	/* Adjust return address to a virtual address */
	sub	ra, ra, tp

	/* Return, with or without paging enabled */
	paging_mode_name a1
	mv	a0, zero
	ret
	.size	enable_paging_32, . - enable_paging_32

	/* Ensure that transition code did not cross an alignment boundary */
	.section ".bss.enable_paging_32_xcheck", "aw", @nobits
	.org	. + enable_paging_32_xalign - enable_paging_32_xlen

	/* Convert 32-bit physical address to virtual address */
	.macro	phys_to_virt_32 rd, rs:vararg
	.ifnb	\rs
	sub	\rd, \rs, tp
	.else
	sub	\rd, \rd, tp
	.endif
	.endm

/*****************************************************************************
 *
 * Disable 32-bit paging
 *
 *****************************************************************************
 *
 * This function may be called with either virtual or flat physical
 * addressing.  It does not require a valid stack pointer.
 *
 * Parameters:
 *
 *   tp - Virtual address offset
 *
 * Returns:
 *
 *   tp - Virtual address offset (zeroed)
 *   pc - Updated to a physical address
 *
 */

	.equ	disable_paging_32_xalign, 16

	.section ".prefix.disable_paging_32", "ax", @progbits
disable_paging_32:
	/* Register usage:
	 *
	 * tp - virtual address offset
	 * a0 - page table address
	 * a1 - transition PTE pointer
	 * a2 - transition PTE content
	 */

	/* Get page table address, and exit if paging is already disabled */
	csrr	a0, satp
	beqz	a0, 99f
	slli	a0, a0, PAGE_SHIFT
	sub	a0, a0, tp

	/* Prepare for modifying transition PTE */
	la	t0, disable_paging_32_xstart
	add	t0, t0, tp
	srli	t0, t0, VPN1_LSB
	slli	a1, t0, PTE_SIZE_LOG2
	add	a1, a1, a0
	slli	a2, t0, PTE_PPN1_LSB
	ori	a2, a2, PTE_LEAF

	/* Jump to physical address in transition PTE, and disable paging */
	la	t0, 1f
	add	t0, t0, tp
	.balign	disable_paging_32_xalign
	/* Start of transition code */
disable_paging_32_xstart:
	STOREN	a2, (a1)
	sfence.vma
	jr	t0
1:	csrw	satp, zero
	sfence.vma
	/* End of transition code */
	.equ	disable_paging_32_xlen, . - disable_paging_32_xstart

	/* Update return address to a physical address */
	add	ra, ra, tp

99:	/* Return with paging disabled and virtual offset zeroed */
	mv	tp, zero
	ret
	.size	disable_paging_32, . - disable_paging_32

	/* Ensure that transition code did not cross an alignment boundary */
	.section ".bss.disable_paging_32_xcheck", "aw", @nobits
	.org	. + disable_paging_32_xalign - disable_paging_32_xlen

/*****************************************************************************
 *
 * Poison .bss section
 *
 *****************************************************************************
 *
 * Fill the .bss section with an invalid non-zero value to expose bugs
 * in early initialisation code that erroneously relies upon variables
 * in .bss before the section has been zeroed.
 *
 * We use the value 0xeb55eb55eb55eb55 ("EBSS") since this is
 * immediately recognisable as a value in a crash dump, and will
 * trigger a page fault if dereferenced since the address is in a
 * non-canonical form.
 *
 * Poisoning the .bss will overwrite the relocation records, and so
 * can be done only as a debugging step on a system where relocation
 * is known to be unnecessary (e.g. because paging is supported).
 *
 * This function does not require a valid stack pointer, but will
 * destroy any existing stack contents if the stack happens to be
 * placed within the original .bss section.
 *
 * Parameters: none
 *
 * Returns: none
 *
 */

	.equ	poison_bss_value_32, 0xeb55eb55
	.equ	poison_bss_value_64, 0xeb55eb55eb55eb55
	.equ	poison_bss_value, _C2 ( poison_bss_value_, __riscv_xlen )

	.section ".prefix.poison_bss", "ax", @progbits
poison_bss:
	/* Fill .bss section */
	la	t0, _bss
	la	t1, _ebss
	li	t2, poison_bss_value
1:	STOREN	t2, (t0)
	addi	t0, t0, ( __riscv_xlen / 8 )
	blt	t0, t1, 1b
	ret
	.size	poison_bss, . - poison_bss

/*****************************************************************************
 *
 * Install iPXE to a suitable runtime address
 *
 *****************************************************************************
 *
 * Identify a suitable runtime address for iPXE, relocate there, and
 * set up for running normal C code.
 *
 * A valid temporary stack pointer is required.  A 4kB space for a
 * temporary page table may be provided, and must be provided if the
 * iPXE image is running from read-only memory.
 *
 * Note that this function does not preserve the callee-save registers.
 *
 * Parameters:
 *
 *   a0 - Boot hart ID
 *   a1 - Device tree physical address
 *   a2 - Optional temporary page table space (4kB, aligned to a 4kB boundary)
 *   sp - Valid temporary stack pointer
 *
 * Returns:
 *
 *   pc - Updated to be within the relocated iPXE
 *   sp - Top of internal stack
 *   tp - Virtual address offset
 *
 */

	.section ".prefix.install", "ax", @progbits
	.globl	install
install:
	/* Register usage:
	 *
	 * s0 - boot hart ID
	 * s1 - device tree physical address
	 * s2 - saved return address
	 * s3 - relocation records physical address
	 * s4 - maximum accessible physical address
	 * s5 - relocation physical address
	 * s6 - relocation offset
	 * tp - virtual address offset
	 */
	mv	tp, zero
	progress "\r\nSBI->iPXE hart:"
	print_hex_reg a0
	progress " temp:"
	print_hex_reg a2
	progress " fdt:"
	print_hex_reg a1
	progress "\r\nSBI->iPXE phys:"
	print_hex_addr _prefix
	progress " virt:"
	print_hex_data prefix_virt
	mv	s0, a0
	mv	s1, a1
	mv	s2, ra
	la	s3, _edata

	/* Poison .bss if configured to do so */
#if POISON_BSS
	call	poison_bss
#endif

	/* Attempt to enable paging, if we have temporary page table space */
	mv	a0, a2
	beqz	a2, 1f
	call	enable_paging
1:	addi	s4, a0, -1

	/* Apply relocations, if still needed after enabling paging */
	mv	a0, s3
	call	apply_relocs

	/* Find a suitable address for relocation (using temporary stack) */
	phys_to_virt a0, s1
	mv	a1, s4
	phys_to_virt sp
	call	fdtmem_relocate
	mv	s5, a0
	progress "SBI->iPXE dest:"
	print_hex_reg a0

	/* Disable paging */
	call	disable_paging

	/* Determine relocation offset */
	la	s6, _prefix
	sub	s6, s5, s6

	/* Copy iPXE image to new location and zero .bss */
	mv	t0, s5
	la	t1, _prefix
	la	t2, _edata
1:	LOADN	t3, (t1)
	STOREN	t3, (t0)
	addi	t0, t0, ( __riscv_xlen / 8 )
	addi	t1, t1, ( __riscv_xlen / 8 )
	blt	t1, t2, 1b
	la	t1, _ebss
	add	t1, t1, s6
2:	STOREN	zero, (t0)
	addi	t0, t0, ( __riscv_xlen / 8 )
	blt	t0, t1, 2b

	/* Jump to relocated copy */
	la	t0, 1f
	add	t0, t0, s6
	jr	t0
1:
	/* Attempt to re-enable paging */
	la	a0, page_table
	call	enable_paging

	/* Reapply relocations, if still needed after enabling paging */
	phys_to_virt a0, s3
	call	apply_relocs

	/* Load stack pointer */
	la	sp, _estack

	/* Store boot hart */
	STOREN	s0, boot_hart, t0

	/* Copy and register system device tree */
	phys_to_virt a0, s1
	mv	a1, s4
	call	fdtmem_register

	/* Return to a virtual address in the relocated copy */
	add	ra, s2, s6
	sub	ra, ra, tp
	progress "\r\n"
	ret
	.size	install, . - install

/*****************************************************************************
 *
 * Reset (or lock up) system
 *
 *****************************************************************************
 *
 * Reset via system via SBI, as a means of exiting from a prefix that
 * has no other defined exit path.  If the reset fails, lock up the
 * system since there is nothing else that can sensibly be done.
 *
 * This function does not require a valid stack pointer.
 *
 * Parameters: none
 *
 * Returns: n/a (does not return)
 *
 */

/* SBI system reset extension */
#define SBI_SRST ( ( 'S' << 24 ) | ( 'R' << 16 ) | ( 'S' << 8 ) | 'T' )
#define SBI_SRST_SYSTEM_RESET 0x00
#define SBI_RESET_COLD 0x00000001

/* SBI legacy shutdown */
#define SBI_LEGACY_SHUTDOWN 0x08

	.section ".prefix.reset_system", "ax", @progbits
	.globl	reset_system
reset_system:
	/* Register usage: irrelevant (does not return) */
	progress "\r\niPXE->SBI reset\r\n"

	/* Attempt reset */
	li	a7, SBI_SRST
	li	a6, SBI_SRST_SYSTEM_RESET
	li	a0, SBI_RESET_COLD
	mv	a1, zero
	ecall
	progress "(reset failed)\r\n"

	/* Attempt legacy shutdown */
	li	a7, SBI_LEGACY_SHUTDOWN
	ecall
	progress "(legacy shutdown failed)\r\n"

	/* If reset failed, lock the system */
1:	wfi
	j	1b
	.size	reset_system, . - reset_system

/*****************************************************************************
 *
 * File split information for the compressor
 *
 *****************************************************************************
 */

/* ELF machine type */
#define EM_RISCV 243

	.section ".zinfo", "a", @progbits
	.org	0
	/* Copy initialised-data portion of image */
	.ascii	"COPY"
	.word	0
	.word	_filesz
	.word	1
	/* Notify compressor of link-time base address */
	.ascii	"BASE"
	.word	0
	.dword	_base
	/* Construct compressed relocation records */
	.ascii	"ZREL"
	.word	_reloc_offset
	.word	_reloc_filesz
	.word	EM_RISCV