1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
|
/*
* Minimal AArch64 system boot code.
*
* Copyright Linaro Ltd 2019
*
* Loosely based on the newlib/libgloss setup stubs. Using semihosting
* for serial output and exit functions.
*/
/*
* Semihosting interface on ARM AArch64
* See "Semihosting for AArch32 and AArch64 Relase 2.0" by ARM
* w0 - semihosting call number
* x1 - semihosting parameter
*/
#define semihosting_call hlt 0xf000
#define SYS_WRITEC 0x03 /* character to debug channel */
#define SYS_WRITE0 0x04 /* string to debug channel */
#define SYS_EXIT 0x18
.align 12
.macro ventry label
.align 7
b \label
.endm
vector_table:
/* Current EL with SP0. */
ventry curr_sp0_sync /* Synchronous */
ventry curr_sp0_irq /* Irq/vIRQ */
ventry curr_sp0_fiq /* Fiq/vFIQ */
ventry curr_sp0_serror /* SError/VSError */
/* Current EL with SPx. */
ventry curr_spx_sync /* Synchronous */
ventry curr_spx_irq /* IRQ/vIRQ */
ventry curr_spx_fiq /* FIQ/vFIQ */
ventry curr_spx_serror /* SError/VSError */
/* Lower EL using AArch64. */
ventry lower_a64_sync /* Synchronous */
ventry lower_a64_irq /* IRQ/vIRQ */
ventry lower_a64_fiq /* FIQ/vFIQ */
ventry lower_a64_serror /* SError/VSError */
/* Lower EL using AArch32. */
ventry lower_a32_sync /* Synchronous */
ventry lower_a32_irq /* IRQ/vIRQ */
ventry lower_a32_fiq /* FIQ/vFIQ */
ventry lower_a32_serror /* SError/VSError */
.text
.align 4
/* Common vector handling for now */
curr_sp0_sync:
curr_sp0_irq:
curr_sp0_fiq:
curr_sp0_serror:
curr_spx_sync:
curr_spx_irq:
curr_spx_fiq:
curr_spx_serror:
lower_a64_sync:
lower_a64_irq:
lower_a64_fiq:
lower_a64_serror:
lower_a32_sync:
lower_a32_irq:
lower_a32_fiq:
lower_a32_serror:
mov x0, SYS_WRITE0
adr x1, .error
semihosting_call
mov x0, SYS_EXIT
mov x1, 1
semihosting_call
/* never returns */
.section .rodata
.error:
.string "Terminated by exception.\n"
.text
.align 4
.global __start
__start:
/* Installs a table of exception vectors to catch and handle all
exceptions by terminating the process with a diagnostic. */
adr x0, vector_table
msr vbar_el1, x0
/* Page table setup (identity mapping). */
adrp x0, ttb
add x0, x0, :lo12:ttb
msr ttbr0_el1, x0
/*
* Setup a flat address mapping page-tables. Stage one simply
* maps RAM to the first Gb. The stage2 tables have two 2mb
* translation block entries covering a series of adjacent
* 4k pages.
*/
/* Stage 1 entry: indexed by IA[38:30] */
adr x1, . /* phys address */
bic x1, x1, #(1 << 30) - 1 /* 1GB alignment*/
add x2, x0, x1, lsr #(30 - 3) /* offset in l1 page table */
/* point to stage 2 table [47:12] */
adrp x0, ttb_stage2
orr x1, x0, #3 /* ptr to stage 2 */
str x1, [x2]
/* Stage 2 entries: indexed by IA[29:21] */
ldr x5, =(((1 << 9) - 1) << 21)
/* First block: .text/RO/execute enabled */
adr x1, . /* phys address */
bic x1, x1, #(1 << 21) - 1 /* 2mb block alignment */
and x4, x1, x5 /* IA[29:21] */
add x2, x0, x4, lsr #(21 - 3) /* offset in l2 page table */
ldr x3, =0x401 /* attr(AF, block) */
orr x1, x1, x3
str x1, [x2] /* 1st 2mb (.text & rodata) */
/* Second block: .data/RW/no execute */
adrp x1, .data
add x1, x1, :lo12:.data
bic x1, x1, #(1 << 21) - 1 /* 2mb block alignment */
and x4, x1, x5 /* IA[29:21] */
add x2, x0, x4, lsr #(21 - 3) /* offset in l2 page table */
ldr x3, =(3 << 53) | 0x401 /* attr(AF, NX, block) */
orr x1, x1, x3
str x1, [x2] /* 2nd 2mb (.data & .bss)*/
/* Setup/enable the MMU. */
/*
* TCR_EL1 - Translation Control Registers
*
* IPS[34:32] = 40-bit PA, 1TB
* TG0[14:15] = b00 => 4kb granuale
* ORGN0[11:10] = Outer: Normal, WB Read-Alloc No Write-Alloc Cacheable
* IRGN0[9:8] = Inner: Normal, WB Read-Alloc No Write-Alloc Cacheable
* T0SZ[5:0] = 2^(64 - 25)
*
* The size of T0SZ controls what the initial lookup level. It
* would be nice to start at level 2 but unfortunatly for a
* flat-mapping on the virt machine we need to handle IA's
* with at least 1gb range to see RAM. So we start with a
* level 1 lookup.
*/
ldr x0, = (2 << 32) | 25 | (3 << 10) | (3 << 8)
msr tcr_el1, x0
mov x0, #0xee /* Inner/outer cacheable WB */
msr mair_el1, x0
isb
/*
* SCTLR_EL1 - System Control Register
*
* WXN[19] = 0 = no effect, Write does not imply XN (execute never)
* I[12] = Instruction cachability control
* SA[3] = SP alignment check
* C[2] = Data cachability control
* M[0] = 1, enable stage 1 address translation for EL0/1
*/
mrs x0, sctlr_el1
ldr x1, =0x100d /* bits I(12) SA(3) C(2) M(0) */
bic x0, x0, #(1 << 1) /* clear bit A(1) */
bic x0, x0, #(1 << 19) /* clear WXN */
orr x0, x0, x1 /* set bits */
dsb sy
msr sctlr_el1, x0
isb
/*
* Enable FP registers. The standard C pre-amble will be
* saving these and A-profile compilers will use AdvSIMD
* registers unless we tell it not to.
*/
mrs x0, cpacr_el1
orr x0, x0, #(3 << 20)
msr cpacr_el1, x0
/* Setup some stack space and enter the test code.
* Assume everthing except the return value is garbage when we
* return, we won't need it.
*/
adrp x0, stack_end
add x0, x0, :lo12:stack_end
mov sp, x0
bl main
/* pass return value to sys exit */
mov x1, x0
ldr x0, =0x20026 /* ADP_Stopped_ApplicationExit */
stp x0, x1, [sp, #-16]!
mov x1, sp
mov x0, SYS_EXIT
semihosting_call
/* never returns */
/*
* Helper Functions
*/
/* Output a single character to serial port */
.global __sys_outc
__sys_outc:
stp x0, x1, [sp, #-16]!
/* pass address of c on stack */
mov x1, sp
mov x0, SYS_WRITEC
semihosting_call
ldp x0, x1, [sp], #16
ret
.data
.align 12
/* Translation table
* @4k granuale: 9 bit lookup, 512 entries
*/
ttb:
.space 4096, 0
.align 12
ttb_stage2:
.space 4096, 0
.align 12
stack:
.space 65536, 0
stack_end:
|