kernel/tests/include/tst_atomic.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334

/* SPDX-License-Identifier: GPL-2.0-or-later
 * Copyright (c) 2016 Cyril Hrubis <chrubis@suse.cz>
 */

/* The LTP library has some of its own atomic synchronisation primitives
 * contained in this file. Generally speaking these should not be used
 * directly in tests for synchronisation, instead use tst_checkpoint.h,
 * tst_fuzzy_sync.h or the POSIX library.
 *
 * Notes on compile and runtime memory barriers and atomics.
 *
 * Within the LTP library we have three concerns when accessing variables
 * shared by multiple threads or processes:
 *
 * (1) Removal or reordering of accesses by the compiler.
 * (2) Atomicity of addition.
 * (3) LOAD-STORE ordering between threads.
 *
 * The first (1) is the most likely to cause an error if not properly
 * handled. We avoid it by using volatile variables and statements which will
 * not be removed or reordered by the compiler during optimisation. This includes
 * the __atomic and __sync intrinsics and volatile asm statements marked with
 * "memory" as well as variables marked with volatile.
 *
 * On any platform Linux is likely to run on, a LOAD (fetch) or STORE of a
 * 32-bit integer will be atomic. However fetching and adding to a variable is
 * quite likely not; so for (2) we need to ensure we use atomic addition.
 *
 * Finally, for tst_fuzzy_sync at least, we need to ensure that LOADs and
 * STOREs of any shared variables (including non-atomics) that are made
 * between calls to tst_fzsync_wait are completed (globally visible) before
 * tst_fzsync_wait completes. For this, runtime memory and instruction
 * barriers are required in addition to compile time.
 *
 * We use full sequential ordering (__ATOMIC_SEQ_CST) for the sake of
 * simplicity. LTP tests tend to be syscall heavy so any performance gain from
 * using a weaker memory model is unlikely to result in a relatively large
 * performance improvement while at the same time being a potent source of
 * confusion.
 *
 * Likewise, for the fallback ASM, the simplest "definitely will work, always"
 * approach is preferred over anything more performant.
 *
 * Also see Documentation/memory-barriers.txt in the kernel tree and
 * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
 * terminology may vary between sources.
 */

#ifndef TST_ATOMIC_H__
#define TST_ATOMIC_H__

#include "config.h"

#if HAVE_ATOMIC_MEMORY_MODEL == 1
static inline int tst_atomic_add_return(int i, int *v)
{
	return __atomic_add_fetch(v, i, __ATOMIC_SEQ_CST);
}

static inline int tst_atomic_load(int *v)
{
	return __atomic_load_n(v, __ATOMIC_SEQ_CST);
}

static inline void tst_atomic_store(int i, int *v)
{
	__atomic_store_n(v, i, __ATOMIC_SEQ_CST);
}

#elif HAVE_SYNC_ADD_AND_FETCH == 1
static inline int tst_atomic_add_return(int i, int *v)
{
	return __sync_add_and_fetch(v, i);
}

static inline int tst_atomic_load(int *v)
{
	int ret;

	__sync_synchronize();
	ret = *v;
	__sync_synchronize();
	return ret;
}

static inline void tst_atomic_store(int i, int *v)
{
	__sync_synchronize();
	*v = i;
	__sync_synchronize();
}

#elif defined(__i386__) || defined(__x86_64__)
# define LTP_USE_GENERIC_LOAD_STORE_ASM 1

static inline int tst_atomic_add_return(int i, int *v)
{
	int __ret = i;

	/*
	 * taken from arch/x86/include/asm/cmpxchg.h
	 */
	asm volatile ("lock; xaddl %0, %1\n"
		: "+r" (__ret), "+m" (*v) : : "memory", "cc");

	return i + __ret;
}

#elif defined(__powerpc__) || defined(__powerpc64__)
static inline int tst_atomic_add_return(int i, int *v)
{
	int t;

	/* taken from arch/powerpc/include/asm/atomic.h */
	asm volatile(
		"	sync\n"
		"1:	lwarx	%0,0,%2		# atomic_add_return\n"
		"	add %0,%1,%0\n"
		"	stwcx.	%0,0,%2 \n"
		"	bne-	1b\n"
		"	sync\n"
		: "=&r" (t)
		: "r" (i), "r" (v)
		: "cc", "memory");

	return t;
}

static inline int tst_atomic_load(int *v)
{
	int ret;

	asm volatile("sync\n" : : : "memory");
	ret = *v;
	asm volatile("sync\n" : : : "memory");

	return ret;
}

static inline void tst_atomic_store(int i, int *v)
{
	asm volatile("sync\n" : : : "memory");
	*v = i;
	asm volatile("sync\n" : : : "memory");
}

#elif defined(__s390__) || defined(__s390x__)
# define LTP_USE_GENERIC_LOAD_STORE_ASM 1

static inline int tst_atomic_add_return(int i, int *v)
{
	int old_val, new_val;

	/* taken from arch/s390/include/asm/atomic.h */
	asm volatile(
		"	l	%0,%2\n"
		"0:	lr	%1,%0\n"
		"	ar	%1,%3\n"
		"	cs	%0,%1,%2\n"
		"	jl	0b"
		: "=&d" (old_val), "=&d" (new_val), "+Q" (*v)
		: "d" (i)
		: "cc", "memory");

	return old_val + i;
}

#elif defined(__arc__)

/*ARCv2 defines the smp barriers */
#ifdef __ARC700__
#define smp_mb()	asm volatile("" : : : "memory")
#else
#define smp_mb()	asm volatile("dmb 3\n" : : : "memory")
#endif

static inline int tst_atomic_add_return(int i, int *v)
{
	unsigned int val;

	smp_mb();

	asm volatile(
		"1:	llock   %[val], [%[ctr]]	\n"
		"	add     %[val], %[val], %[i]	\n"
		"	scond   %[val], [%[ctr]]	\n"
		"	bnz     1b			\n"
		: [val]	"=&r"	(val)
		: [ctr]	"r"	(v),
		  [i]	"ir"	(i)
		: "cc", "memory");

	smp_mb();

	return val;
}

static inline int tst_atomic_load(int *v)
{
	int ret;

	smp_mb();
	ret = *v;
	smp_mb();

	return ret;
}

static inline void tst_atomic_store(int i, int *v)
{
	smp_mb();
	*v = i;
	smp_mb();
}

#elif defined (__aarch64__)
static inline int tst_atomic_add_return(int i, int *v)
{
	unsigned long tmp;
	int result;

	__asm__ __volatile__(
"       prfm    pstl1strm, %2	\n"
"1:     ldaxr	%w0, %2		\n"
"       add	%w0, %w0, %w3	\n"
"       stlxr	%w1, %w0, %2	\n"
"       cbnz	%w1, 1b		\n"
"       dmb ish			\n"
	: "=&r" (result), "=&r" (tmp), "+Q" (*v)
	: "Ir" (i)
	: "memory");

	return result;
}

/* We are using load and store exclusive (ldaxr & stlxr) instructions to try
 * and help prevent the tst_atomic_load and, more likely, tst_atomic_store
 * functions from interfering with tst_atomic_add_return which takes advantage
 * of exclusivity. It is not clear if this is a good idea or not, but does
 * mean that all three functions are very similar.
 */
static inline int tst_atomic_load(int *v)
{
	int ret;
	unsigned long tmp;

	asm volatile("//atomic_load			\n"
		"	prfm	pstl1strm,  %[v]	\n"
		"1:	ldaxr	%w[ret], %[v]		\n"
		"	stlxr   %w[tmp], %w[ret], %[v]  \n"
		"	cbnz    %w[tmp], 1b		\n"
		"	dmb ish				\n"
		: [tmp] "=&r" (tmp), [ret] "=&r" (ret), [v] "+Q" (*v)
		: : "memory");

	return ret;
}

static inline void tst_atomic_store(int i, int *v)
{
	unsigned long tmp;

	asm volatile("//atomic_store			\n"
		"	prfm	pstl1strm, %[v]		\n"
		"1:	ldaxr	%w[tmp], %[v]		\n"
		"	stlxr   %w[tmp], %w[i], %[v]	\n"
		"	cbnz    %w[tmp], 1b		\n"
		"	dmb ish				\n"
		: [tmp] "=&r" (tmp), [v] "+Q" (*v)
		: [i] "r" (i)
		: "memory");
}

#elif defined(__sparc__) && defined(__arch64__)
# define LTP_USE_GENERIC_LOAD_STORE_ASM 1
static inline int tst_atomic_add_return(int i, int *v)
{
	int ret, tmp;

	/* Based on arch/sparc/lib/atomic_64.S with the exponential backoff
	 * function removed because we are unlikely to have a large (>= 16?)
	 * number of cores continuously trying to update one variable.
	 */
	asm volatile("/*atomic_add_return*/		\n"
		"1:	ldsw	[%[v]], %[ret];		\n"
		"	add	%[ret], %[i], %[tmp];	\n"
		"	cas	[%[v]], %[ret], %[tmp];	\n"
		"	cmp	%[ret], %[tmp];		\n"
		"	bne,pn	%%icc, 1b;		\n"
		"	nop;				\n"
		"	add	%[ret], %[i], %[ret];	\n"
		: [ret] "=r&" (ret), [tmp] "=r&" (tmp)
		: [i] "r" (i), [v] "r" (v)
		: "memory", "cc");

	return ret;
}

#else /* HAVE_SYNC_ADD_AND_FETCH == 1 */
# error Your compiler does not provide __atomic_add_fetch, __sync_add_and_fetch \
        and an LTP implementation is missing for your architecture.
#endif

#ifdef LTP_USE_GENERIC_LOAD_STORE_ASM
static inline int tst_atomic_load(int *v)
{
	int ret;

	asm volatile("" : : : "memory");
	ret = *v;
	asm volatile("" : : : "memory");

	return ret;
}

static inline void tst_atomic_store(int i, int *v)
{
	asm volatile("" : : : "memory");
	*v = i;
	asm volatile("" : : : "memory");
}
#endif

static inline int tst_atomic_inc(int *v)
{
	return tst_atomic_add_return(1, v);
}

static inline int tst_atomic_dec(int *v)
{
	return tst_atomic_add_return(-1, v);
}

#endif	/* TST_ATOMIC_H__ */