summaryrefslogtreecommitdiffstats
path: root/Documentation/kref.txt
blob: 3af384156d7e0776d8b1fa35225a5f8745484390 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
===================================================
Adding reference counters (krefs) to kernel objects
===================================================

:Author: Corey Minyard <minyard@acm.org>
:Author: Thomas Hellstrom <thellstrom@vmware.com>

A lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and
presentation on krefs, which can be found at:

  - http://www.kroah.com/linux/talks/ols_2004_kref_paper/Reprint-Kroah-Hartman-OLS2004.pdf
  - http://www.kroah.com/linux/talks/ols_2004_kref_talk/

Introduction
============

krefs allow you to add reference counters to your objects.  If you
have objects that are used in multiple places and passed around, and
you don't have refcounts, your code is almost certainly broken.  If
you want refcounts, krefs are the way to go.

To use a kref, add one to your data structures like::

    struct my_data
    {
	.
	.
	struct kref refcount;
	.
	.
    };

The kref can occur anywhere within the data structure.

Initialization
==============

You must initialize the kref after you allocate it.  To do this, call
kref_init as so::

     struct my_data *data;

     data = kmalloc(sizeof(*data), GFP_KERNEL);
     if (!data)
            return -ENOMEM;
     kref_init(&data->refcount);

This sets the refcount in the kref to 1.

Kref rules
==========

Once you have an initialized kref, you must follow the following
rules:

1) If you make a non-temporary copy of a pointer, especially if
   it can be passed to another thread of execution, you must
   increment the refcount with kref_get() before passing it off::

       kref_get(&data->refcount);

   If you already have a valid pointer to a kref-ed structure (the
   refcount cannot go to zero) you may do this without a lock.

2) When you are done with a pointer, you must call kref_put()::

       kref_put(&data->refcount, data_release);

   If this is the last reference to the pointer, the release
   routine will be called.  If the code never tries to get
   a valid pointer to a kref-ed structure without already
   holding a valid pointer, it is safe to do this without
   a lock.

3) If the code attempts to gain a reference to a kref-ed structure
   without already holding a valid pointer, it must serialize access
   where a kref_put() cannot occur during the kref_get(), and the
   structure must remain valid during the kref_get().

For example, if you allocate some data and then pass it to another
thread to process::

    void data_release(struct kref *ref)
    {
	struct my_data *data = container_of(ref, struct my_data, refcount);
	kfree(data);
    }

    void more_data_handling(void *cb_data)
    {
	struct my_data *data = cb_data;
	.
	. do stuff with data here
	.
	kref_put(&data->refcount, data_release);
    }

    int my_data_handler(void)
    {
	int rv = 0;
	struct my_data *data;
	struct task_struct *task;
	data = kmalloc(sizeof(*data), GFP_KERNEL);
	if (!data)
		return -ENOMEM;
	kref_init(&data->refcount);

	kref_get(&data->refcount);
	task = kthread_run(more_data_handling, data, "more_data_handling");
	if (task == ERR_PTR(-ENOMEM)) {
		rv = -ENOMEM;
	        kref_put(&data->refcount, data_release);
		goto out;
	}

	.
	. do stuff with data here
	.
    out:
	kref_put(&data->refcount, data_release);
	return rv;
    }

This way, it doesn't matter what order the two threads handle the
data, the kref_put() handles knowing when the data is not referenced
any more and releasing it.  The kref_get() does not require a lock,
since we already have a valid pointer that we own a refcount for.  The
put needs no lock because nothing tries to get the data without
already holding a pointer.

Note that the "before" in rule 1 is very important.  You should never
do something like::

	task = kthread_run(more_data_handling, data, "more_data_handling");
	if (task == ERR_PTR(-ENOMEM)) {
		rv = -ENOMEM;
		goto out;
	} else
		/* BAD BAD BAD - get is after the handoff */
		kref_get(&data->refcount);

Don't assume you know what you are doing and use the above construct.
First of all, you may not know what you are doing.  Second, you may
know what you are doing (there are some situations where locking is
involved where the above may be legal) but someone else who doesn't
know what they are doing may change the code or copy the code.  It's
bad style.  Don't do it.

There are some situations where you can optimize the gets and puts.
For instance, if you are done with an object and enqueuing it for
something else or passing it off to something else, there is no reason
to do a get then a put::

	/* Silly extra get and put */
	kref_get(&obj->ref);
	enqueue(obj);
	kref_put(&obj->ref, obj_cleanup);

Just do the enqueue.  A comment about this is always welcome::

	enqueue(obj);
	/* We are done with obj, so we pass our refcount off
	   to the queue.  DON'T TOUCH obj AFTER HERE! */

The last rule (rule 3) is the nastiest one to handle.  Say, for
instance, you have a list of items that are each kref-ed, and you wish
to get the first one.  You can't just pull the first item off the list
and kref_get() it.  That violates rule 3 because you are not already
holding a valid pointer.  You must add a mutex (or some other lock).
For instance::

	static DEFINE_MUTEX(mutex);
	static LIST_HEAD(q);
	struct my_data
	{
		struct kref      refcount;
		struct list_head link;
	};

	static struct my_data *get_entry()
	{
		struct my_data *entry = NULL;
		mutex_lock(&mutex);
		if (!list_empty(&q)) {
			entry = container_of(q.next, struct my_data, link);
			kref_get(&entry->refcount);
		}
		mutex_unlock(&mutex);
		return entry;
	}

	static void release_entry(struct kref *ref)
	{
		struct my_data *entry = container_of(ref, struct my_data, refcount);

		list_del(&entry->link);
		kfree(entry);
	}

	static void put_entry(struct my_data *entry)
	{
		mutex_lock(&mutex);
		kref_put(&entry->refcount, release_entry);
		mutex_unlock(&mutex);
	}

The kref_put() return value is useful if you do not want to hold the
lock during the whole release operation.  Say you didn't want to call
kfree() with the lock held in the example above (since it is kind of
pointless to do so).  You could use kref_put() as follows::

	static void release_entry(struct kref *ref)
	{
		/* All work is done after the return from kref_put(). */
	}

	static void put_entry(struct my_data *entry)
	{
		mutex_lock(&mutex);
		if (kref_put(&entry->refcount, release_entry)) {
			list_del(&entry->link);
			mutex_unlock(&mutex);
			kfree(entry);
		} else
			mutex_unlock(&mutex);
	}

This is really more useful if you have to call other routines as part
of the free operations that could take a long time or might claim the
same lock.  Note that doing everything in the release routine is still
preferred as it is a little neater.

The above example could also be optimized using kref_get_unless_zero() in
the following way::

	static struct my_data *get_entry()
	{
		struct my_data *entry = NULL;
		mutex_lock(&mutex);
		if (!list_empty(&q)) {
			entry = container_of(q.next, struct my_data, link);
			if (!kref_get_unless_zero(&entry->refcount))
				entry = NULL;
		}
		mutex_unlock(&mutex);
		return entry;
	}

	static void release_entry(struct kref *ref)
	{
		struct my_data *entry = container_of(ref, struct my_data, refcount);

		mutex_lock(&mutex);
		list_del(&entry->link);
		mutex_unlock(&mutex);
		kfree(entry);
	}

	static void put_entry(struct my_data *entry)
	{
		kref_put(&entry->refcount, release_entry);
	}

Which is useful to remove the mutex lock around kref_put() in put_entry(), but
it's important that kref_get_unless_zero is enclosed in the same critical
section that finds the entry in the lookup table,
otherwise kref_get_unless_zero may reference already freed memory.
Note that it is illegal to use kref_get_unless_zero without checking its
return value. If you are sure (by already having a valid pointer) that
kref_get_unless_zero() will return true, then use kref_get() instead.

Krefs and RCU
=============

The function kref_get_unless_zero also makes it possible to use rcu
locking for lookups in the above example::

	struct my_data
	{
		struct rcu_head rhead;
		.
		struct kref refcount;
		.
		.
	};

	static struct my_data *get_entry_rcu()
	{
		struct my_data *entry = NULL;
		rcu_read_lock();
		if (!list_empty(&q)) {
			entry = container_of(q.next, struct my_data, link);
			if (!kref_get_unless_zero(&entry->refcount))
				entry = NULL;
		}
		rcu_read_unlock();
		return entry;
	}

	static void release_entry_rcu(struct kref *ref)
	{
		struct my_data *entry = container_of(ref, struct my_data, refcount);

		mutex_lock(&mutex);
		list_del_rcu(&entry->link);
		mutex_unlock(&mutex);
		kfree_rcu(entry, rhead);
	}

	static void put_entry(struct my_data *entry)
	{
		kref_put(&entry->refcount, release_entry_rcu);
	}

But note that the struct kref member needs to remain in valid memory for a
rcu grace period after release_entry_rcu was called. That can be accomplished
by using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu()
before using kfree, but note that synchronize_rcu() may sleep for a
substantial amount of time.
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585



























                                                                          







                                                           
                                                          






























                                                
                                       








































































                                                                         
                      




                            






                                                              















                                                                       
                                                           




































































































































































































                                                                                
                                                                        




























































                                                                             
                                                     






                                             
                                                   



































































































































                                                                               












                                                               
/*
 * Kontron PLD watchdog driver
 *
 * Copyright (c) 2010-2013 Kontron Europe GmbH
 * Author: Michael Brunner <michael.brunner@kontron.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License 2 as published
 * by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * Note: From the PLD watchdog point of view timeout and pretimeout are
 *       defined differently than in the kernel.
 *       First the pretimeout stage runs out before the timeout stage gets
 *       active.
 *
 * Kernel/API:                     P-----| pretimeout
 *               |-----------------------T timeout
 * Watchdog:     |-----------------P       pretimeout_stage
 *                                 |-----T timeout_stage
 */

#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/uaccess.h>
#include <linux/watchdog.h>
#include <linux/platform_device.h>
#include <linux/mfd/kempld.h>

#define KEMPLD_WDT_STAGE_TIMEOUT(x)	(0x1b + (x) * 4)
#define KEMPLD_WDT_STAGE_CFG(x)		(0x18 + (x))
#define STAGE_CFG_GET_PRESCALER(x)	(((x) & 0x30) >> 4)
#define STAGE_CFG_SET_PRESCALER(x)	(((x) & 0x3) << 4)
#define STAGE_CFG_PRESCALER_MASK	0x30
#define STAGE_CFG_ACTION_MASK		0x7
#define STAGE_CFG_ASSERT		(1 << 3)

#define KEMPLD_WDT_MAX_STAGES		2
#define KEMPLD_WDT_KICK			0x16
#define KEMPLD_WDT_CFG			0x17
#define KEMPLD_WDT_CFG_ENABLE		0x10
#define KEMPLD_WDT_CFG_ENABLE_LOCK	0x8
#define KEMPLD_WDT_CFG_GLOBAL_LOCK	0x80

enum {
	ACTION_NONE = 0,
	ACTION_RESET,
	ACTION_NMI,
	ACTION_SMI,
	ACTION_SCI,
	ACTION_DELAY,
};

enum {
	STAGE_TIMEOUT = 0,
	STAGE_PRETIMEOUT,
};

enum {
	PRESCALER_21 = 0,
	PRESCALER_17,
	PRESCALER_12,
};

static const u32 kempld_prescaler[] = {
	[PRESCALER_21] = (1 << 21) - 1,
	[PRESCALER_17] = (1 << 17) - 1,
	[PRESCALER_12] = (1 << 12) - 1,
	0,
};

struct kempld_wdt_stage {
	unsigned int	id;
	u32		mask;
};

struct kempld_wdt_data {
	struct kempld_device_data	*pld;
	struct watchdog_device		wdd;
	unsigned int			pretimeout;
	struct kempld_wdt_stage		stage[KEMPLD_WDT_MAX_STAGES];
#ifdef CONFIG_PM
	u8				pm_status_store;
#endif
};

#define DEFAULT_TIMEOUT		30 /* seconds */
#define DEFAULT_PRETIMEOUT	0

static unsigned int timeout = DEFAULT_TIMEOUT;
module_param(timeout, uint, 0);
MODULE_PARM_DESC(timeout,
	"Watchdog timeout in seconds. (>=0, default="
	__MODULE_STRING(DEFAULT_TIMEOUT) ")");

static unsigned int pretimeout = DEFAULT_PRETIMEOUT;
module_param(pretimeout, uint, 0);
MODULE_PARM_DESC(pretimeout,
	"Watchdog pretimeout in seconds. (>=0, default="
	__MODULE_STRING(DEFAULT_PRETIMEOUT) ")");

static bool nowayout = WATCHDOG_NOWAYOUT;
module_param(nowayout, bool, 0);
MODULE_PARM_DESC(nowayout,
	"Watchdog cannot be stopped once started (default="
	__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");

static int kempld_wdt_set_stage_action(struct kempld_wdt_data *wdt_data,
					struct kempld_wdt_stage *stage,
					u8 action)
{
	struct kempld_device_data *pld = wdt_data->pld;
	u8 stage_cfg;

	if (!stage || !stage->mask)
		return -EINVAL;

	kempld_get_mutex(pld);
	stage_cfg = kempld_read8(pld, KEMPLD_WDT_STAGE_CFG(stage->id));
	stage_cfg &= ~STAGE_CFG_ACTION_MASK;
	stage_cfg |= (action & STAGE_CFG_ACTION_MASK);

	if (action == ACTION_RESET)
		stage_cfg |= STAGE_CFG_ASSERT;
	else
		stage_cfg &= ~STAGE_CFG_ASSERT;

	kempld_write8(pld, KEMPLD_WDT_STAGE_CFG(stage->id), stage_cfg);
	kempld_release_mutex(pld);

	return 0;
}

static int kempld_wdt_set_stage_timeout(struct kempld_wdt_data *wdt_data,
					struct kempld_wdt_stage *stage,
					unsigned int timeout)
{
	struct kempld_device_data *pld = wdt_data->pld;
	u32 prescaler;
	u64 stage_timeout64;
	u32 stage_timeout;
	u32 remainder;
	u8 stage_cfg;

#if GCC_VERSION < 40400
	/* work around a bug compiling do_div() */
	prescaler = READ_ONCE(kempld_prescaler[PRESCALER_21]);
#else
	prescaler = kempld_prescaler[PRESCALER_21];
#endif

	if (!stage)
		return -EINVAL;

	stage_timeout64 = (u64)timeout * pld->pld_clock;
	remainder = do_div(stage_timeout64, prescaler);
	if (remainder)
		stage_timeout64++;

	if (stage_timeout64 > stage->mask)
		return -EINVAL;

	stage_timeout = stage_timeout64 & stage->mask;

	kempld_get_mutex(pld);
	stage_cfg = kempld_read8(pld, KEMPLD_WDT_STAGE_CFG(stage->id));
	stage_cfg &= ~STAGE_CFG_PRESCALER_MASK;
	stage_cfg |= STAGE_CFG_SET_PRESCALER(PRESCALER_21);
	kempld_write8(pld, KEMPLD_WDT_STAGE_CFG(stage->id), stage_cfg);
	kempld_write32(pld, KEMPLD_WDT_STAGE_TIMEOUT(stage->id),
			stage_timeout);
	kempld_release_mutex(pld);

	return 0;
}

/*
 * kempld_get_mutex must be called prior to calling this function.
 */
static unsigned int kempld_wdt_get_timeout(struct kempld_wdt_data *wdt_data,
						struct kempld_wdt_stage *stage)
{
	struct kempld_device_data *pld = wdt_data->pld;
	unsigned int timeout;
	u64 stage_timeout;
	u32 prescaler;
	u32 remainder;
	u8 stage_cfg;

	if (!stage->mask)
		return 0;

	stage_cfg = kempld_read8(pld, KEMPLD_WDT_STAGE_CFG(stage->id));
	stage_timeout = kempld_read32(pld, KEMPLD_WDT_STAGE_TIMEOUT(stage->id));
	prescaler = kempld_prescaler[STAGE_CFG_GET_PRESCALER(stage_cfg)];

	stage_timeout = (stage_timeout & stage->mask) * prescaler;
	remainder = do_div(stage_timeout, pld->pld_clock);
	if (remainder)
		stage_timeout++;

	timeout = stage_timeout;
	WARN_ON_ONCE(timeout != stage_timeout);

	return timeout;
}

static int kempld_wdt_set_timeout(struct watchdog_device *wdd,
					unsigned int timeout)
{
	struct kempld_wdt_data *wdt_data = watchdog_get_drvdata(wdd);
	struct kempld_wdt_stage *pretimeout_stage;
	struct kempld_wdt_stage *timeout_stage;
	int ret;

	timeout_stage = &wdt_data->stage[STAGE_TIMEOUT];
	pretimeout_stage = &wdt_data->stage[STAGE_PRETIMEOUT];

	if (pretimeout_stage->mask && wdt_data->pretimeout > 0)
		timeout = wdt_data->pretimeout;

	ret = kempld_wdt_set_stage_action(wdt_data, timeout_stage,
						ACTION_RESET);
	if (ret)
		return ret;
	ret = kempld_wdt_set_stage_timeout(wdt_data, timeout_stage,
						timeout);
	if (ret)
		return ret;

	wdd->timeout = timeout;
	return 0;
}

static int kempld_wdt_set_pretimeout(struct watchdog_device *wdd,
					unsigned int pretimeout)
{
	struct kempld_wdt_data *wdt_data = watchdog_get_drvdata(wdd);
	struct kempld_wdt_stage *pretimeout_stage;
	u8 action = ACTION_NONE;
	int ret;

	pretimeout_stage = &wdt_data->stage[STAGE_PRETIMEOUT];

	if (!pretimeout_stage->mask)
		return -ENXIO;

	if (pretimeout > wdd->timeout)
		return -EINVAL;

	if (pretimeout > 0)
		action = ACTION_NMI;

	ret = kempld_wdt_set_stage_action(wdt_data, pretimeout_stage,
						action);
	if (ret)
		return ret;
	ret = kempld_wdt_set_stage_timeout(wdt_data, pretimeout_stage,
						wdd->timeout - pretimeout);
	if (ret)
		return ret;

	wdt_data->pretimeout = pretimeout;
	return 0;
}

static void kempld_wdt_update_timeouts(struct kempld_wdt_data *wdt_data)
{
	struct kempld_device_data *pld = wdt_data->pld;
	struct kempld_wdt_stage *pretimeout_stage;
	struct kempld_wdt_stage *timeout_stage;
	unsigned int pretimeout, timeout;

	pretimeout_stage = &wdt_data->stage[STAGE_PRETIMEOUT];
	timeout_stage = &wdt_data->stage[STAGE_TIMEOUT];

	kempld_get_mutex(pld);
	pretimeout = kempld_wdt_get_timeout(wdt_data, pretimeout_stage);
	timeout = kempld_wdt_get_timeout(wdt_data, timeout_stage);
	kempld_release_mutex(pld);

	if (pretimeout)
		wdt_data->pretimeout = timeout;
	else
		wdt_data->pretimeout = 0;

	wdt_data->wdd.timeout = pretimeout + timeout;
}

static int kempld_wdt_start(struct watchdog_device *wdd)
{
	struct kempld_wdt_data *wdt_data = watchdog_get_drvdata(wdd);
	struct kempld_device_data *pld = wdt_data->pld;
	u8 status;
	int ret;

	ret = kempld_wdt_set_timeout(wdd, wdd->timeout);
	if (ret)
		return ret;

	kempld_get_mutex(pld);
	status = kempld_read8(pld, KEMPLD_WDT_CFG);
	status |= KEMPLD_WDT_CFG_ENABLE;
	kempld_write8(pld, KEMPLD_WDT_CFG, status);
	status = kempld_read8(pld, KEMPLD_WDT_CFG);
	kempld_release_mutex(pld);

	/* Check if the watchdog was enabled */
	if (!(status & KEMPLD_WDT_CFG_ENABLE))
		return -EACCES;

	return 0;
}

static int kempld_wdt_stop(struct watchdog_device *wdd)
{
	struct kempld_wdt_data *wdt_data = watchdog_get_drvdata(wdd);
	struct kempld_device_data *pld = wdt_data->pld;
	u8 status;

	kempld_get_mutex(pld);
	status = kempld_read8(pld, KEMPLD_WDT_CFG);
	status &= ~KEMPLD_WDT_CFG_ENABLE;
	kempld_write8(pld, KEMPLD_WDT_CFG, status);
	status = kempld_read8(pld, KEMPLD_WDT_CFG);
	kempld_release_mutex(pld);

	/* Check if the watchdog was disabled */
	if (status & KEMPLD_WDT_CFG_ENABLE)
		return -EACCES;

	return 0;
}

static int kempld_wdt_keepalive(struct watchdog_device *wdd)
{
	struct kempld_wdt_data *wdt_data = watchdog_get_drvdata(wdd);
	struct kempld_device_data *pld = wdt_data->pld;

	kempld_get_mutex(pld);
	kempld_write8(pld, KEMPLD_WDT_KICK, 'K');
	kempld_release_mutex(pld);

	return 0;
}

static long kempld_wdt_ioctl(struct watchdog_device *wdd, unsigned int cmd,
				unsigned long arg)
{
	struct kempld_wdt_data *wdt_data = watchdog_get_drvdata(wdd);
	void __user *argp = (void __user *)arg;
	int ret = -ENOIOCTLCMD;
	int __user *p = argp;
	int new_value;

	switch (cmd) {
	case WDIOC_SETPRETIMEOUT:
		if (get_user(new_value, p))
			return -EFAULT;
		ret = kempld_wdt_set_pretimeout(wdd, new_value);
		if (ret)
			return ret;
		ret = kempld_wdt_keepalive(wdd);
		break;
	case WDIOC_GETPRETIMEOUT:
		ret = put_user(wdt_data->pretimeout, (int __user *)arg);
		break;
	}

	return ret;
}

static int kempld_wdt_probe_stages(struct watchdog_device *wdd)
{
	struct kempld_wdt_data *wdt_data = watchdog_get_drvdata(wdd);
	struct kempld_device_data *pld = wdt_data->pld;
	struct kempld_wdt_stage *pretimeout_stage;
	struct kempld_wdt_stage *timeout_stage;
	u8 index, data, data_orig;
	u32 mask;
	int i, j;

	pretimeout_stage = &wdt_data->stage[STAGE_PRETIMEOUT];
	timeout_stage = &wdt_data->stage[STAGE_TIMEOUT];

	pretimeout_stage->mask = 0;
	timeout_stage->mask = 0;

	for (i = 0; i < 3; i++) {
		index = KEMPLD_WDT_STAGE_TIMEOUT(i);
		mask = 0;

		kempld_get_mutex(pld);
		/* Probe each byte individually. */
		for (j = 0; j < 4; j++) {
			data_orig = kempld_read8(pld, index + j);
			kempld_write8(pld, index + j, 0x00);
			data = kempld_read8(pld, index + j);
			/* A failed write means this byte is reserved */
			if (data != 0x00)
				break;
			kempld_write8(pld, index + j, data_orig);
			mask |= 0xff << (j * 8);
		}
		kempld_release_mutex(pld);

		/* Assign available stages to timeout and pretimeout */
		if (!timeout_stage->mask) {
			timeout_stage->mask = mask;
			timeout_stage->id = i;
		} else {
			if (pld->feature_mask & KEMPLD_FEATURE_BIT_NMI) {
				pretimeout_stage->mask = timeout_stage->mask;
				timeout_stage->mask = mask;
				pretimeout_stage->id = timeout_stage->id;
				timeout_stage->id = i;
			}
			break;
		}
	}

	if (!timeout_stage->mask)
		return -ENODEV;

	return 0;
}

static const struct watchdog_info kempld_wdt_info = {
	.identity	= "KEMPLD Watchdog",
	.options	= WDIOF_SETTIMEOUT |
			WDIOF_KEEPALIVEPING |
			WDIOF_MAGICCLOSE |
			WDIOF_PRETIMEOUT
};

static const struct watchdog_ops kempld_wdt_ops = {
	.owner		= THIS_MODULE,
	.start		= kempld_wdt_start,
	.stop		= kempld_wdt_stop,
	.ping		= kempld_wdt_keepalive,
	.set_timeout	= kempld_wdt_set_timeout,
	.ioctl		= kempld_wdt_ioctl,
};

static int kempld_wdt_probe(struct platform_device *pdev)
{
	struct kempld_device_data *pld = dev_get_drvdata(pdev->dev.parent);
	struct kempld_wdt_data *wdt_data;
	struct device *dev = &pdev->dev;
	struct watchdog_device *wdd;
	u8 status;
	int ret = 0;

	wdt_data = devm_kzalloc(dev, sizeof(*wdt_data), GFP_KERNEL);
	if (!wdt_data)
		return -ENOMEM;

	wdt_data->pld = pld;
	wdd = &wdt_data->wdd;
	wdd->parent = dev;

	kempld_get_mutex(pld);
	status = kempld_read8(pld, KEMPLD_WDT_CFG);
	kempld_release_mutex(pld);

	/* Enable nowayout if watchdog is already locked */
	if (status & (KEMPLD_WDT_CFG_ENABLE_LOCK |
			KEMPLD_WDT_CFG_GLOBAL_LOCK)) {
		if (!nowayout)
			dev_warn(dev,
				"Forcing nowayout - watchdog lock enabled!\n");
		nowayout = true;
	}

	wdd->info = &kempld_wdt_info;
	wdd->ops = &kempld_wdt_ops;

	watchdog_set_drvdata(wdd, wdt_data);
	watchdog_set_nowayout(wdd, nowayout);

	ret = kempld_wdt_probe_stages(wdd);
	if (ret)
		return ret;

	kempld_wdt_set_timeout(wdd, timeout);
	kempld_wdt_set_pretimeout(wdd, pretimeout);

	/* Check if watchdog is already enabled */
	if (status & KEMPLD_WDT_CFG_ENABLE) {
		/* Get current watchdog settings */
		kempld_wdt_update_timeouts(wdt_data);
		dev_info(dev, "Watchdog was already enabled\n");
	}

	platform_set_drvdata(pdev, wdt_data);
	ret = watchdog_register_device(wdd);
	if (ret)
		return ret;

	dev_info(dev, "Watchdog registered with %ds timeout\n", wdd->timeout);

	return 0;
}

static void kempld_wdt_shutdown(struct platform_device *pdev)
{
	struct kempld_wdt_data *wdt_data = platform_get_drvdata(pdev);

	kempld_wdt_stop(&wdt_data->wdd);
}

static int kempld_wdt_remove(struct platform_device *pdev)
{
	struct kempld_wdt_data *wdt_data = platform_get_drvdata(pdev);
	struct watchdog_device *wdd = &wdt_data->wdd;
	int ret = 0;

	if (!nowayout)
		ret = kempld_wdt_stop(wdd);
	watchdog_unregister_device(wdd);

	return ret;
}

#ifdef CONFIG_PM
/* Disable watchdog if it is active during suspend */
static int kempld_wdt_suspend(struct platform_device *pdev,
				pm_message_t message)
{
	struct kempld_wdt_data *wdt_data = platform_get_drvdata(pdev);
	struct kempld_device_data *pld = wdt_data->pld;
	struct watchdog_device *wdd = &wdt_data->wdd;

	kempld_get_mutex(pld);
	wdt_data->pm_status_store = kempld_read8(pld, KEMPLD_WDT_CFG);
	kempld_release_mutex(pld);

	kempld_wdt_update_timeouts(wdt_data);

	if (wdt_data->pm_status_store & KEMPLD_WDT_CFG_ENABLE)
		return kempld_wdt_stop(wdd);

	return 0;
}

/* Enable watchdog and configure it if necessary */
static int kempld_wdt_resume(struct platform_device *pdev)
{
	struct kempld_wdt_data *wdt_data = platform_get_drvdata(pdev);
	struct watchdog_device *wdd = &wdt_data->wdd;

	/*
	 * If watchdog was stopped before suspend be sure it gets disabled
	 * again, for the case BIOS has enabled it during resume
	 */
	if (wdt_data->pm_status_store & KEMPLD_WDT_CFG_ENABLE)
		return kempld_wdt_start(wdd);
	else
		return kempld_wdt_stop(wdd);
}
#else
#define kempld_wdt_suspend	NULL
#define kempld_wdt_resume	NULL
#endif

static struct platform_driver kempld_wdt_driver = {
	.driver		= {
		.name	= "kempld-wdt",
	},
	.probe		= kempld_wdt_probe,
	.remove		= kempld_wdt_remove,
	.shutdown	= kempld_wdt_shutdown,
	.suspend	= kempld_wdt_suspend,
	.resume		= kempld_wdt_resume,
};

module_platform_driver(kempld_wdt_driver);

MODULE_DESCRIPTION("KEM PLD Watchdog Driver");
MODULE_AUTHOR("Michael Brunner <michael.brunner@kontron.com>");
MODULE_LICENSE("GPL");