From 265be2d09853d425ad14a61cda0ca63345613d0c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 31 May 2010 10:14:17 +0200 Subject: drbd: Finished the "on-no-data-accessible suspend-io;" functionality When no data is accessible (no connection to the peer, nor a local disk) allow the user to select to freeze all IO operations instead of getting IO errors. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd.h | 5 +++++ include/linux/drbd_limits.h | 1 + include/linux/drbd_nl.h | 1 + 3 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 479ee3a1d901..7be069fcca57 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -91,6 +91,11 @@ enum drbd_after_sb_p { ASB_VIOLENTLY }; +enum drbd_on_no_data { + OND_IO_ERROR, + OND_SUSPEND_IO +}; + /* KEEP the order, do not delete or insert. Only append. */ enum drbd_ret_codes { ERR_CODE_BASE = 100, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 440b42e38e89..7eb1e98009ec 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -128,6 +128,7 @@ #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT #define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT +#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR #define DRBD_MAX_BIO_BVECS_MIN 0 #define DRBD_MAX_BIO_BVECS_MAX 128 diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index 5f042810a56c..9aebd0d80a5d 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -87,6 +87,7 @@ NL_PACKET(syncer_conf, 8, NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32) NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX) NL_BIT( 65, T_MAY_IGNORE, use_rle) + NL_INTEGER( 75, T_MAY_IGNORE, on_no_data) ) NL_PACKET(invalidate, 9, ) -- cgit v1.2.3-55-g7522 From 47ff2d0a8e7ce87fed180729e8341f650bf585c8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 18 Jun 2010 13:56:57 +0200 Subject: drbd: Do not allow a fencing-policy of resource-and-stonith with protocol A Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 20 +++++++++++++++++++- drivers/block/drbd/drbd_req.c | 2 +- include/linux/drbd.h | 1 + 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 563a6ade0179..5288bd72cd27 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -806,6 +806,15 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto fail; } + if (get_net_conf(mdev)) { + int prot = mdev->net_conf->wire_protocol; + put_net_conf(mdev); + if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) { + retcode = ERR_STONITH_AND_PROT_A; + goto fail; + } + } + nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0); if (IS_ERR(nbc->lo_file)) { dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, @@ -1238,7 +1247,16 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, && (new_conf->wire_protocol != DRBD_PROT_C)) { retcode = ERR_NOT_PROTO_C; goto fail; - }; + } + + if (get_ldev(mdev)) { + enum drbd_fencing_p fp = mdev->ldev->dc.fencing; + put_ldev(mdev); + if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) { + retcode = ERR_STONITH_AND_PROT_A; + goto fail; + } + } if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { retcode = ERR_DISCARD; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 8259d4f77285..fbe027886bad 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -660,7 +660,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case resend: /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK - before the connection loss; only P_BARRIER_ACK was missing. + before the connection loss (B&C only); only P_BARRIER_ACK was missing. Trowing them out of the TL here by pretending we got a BARRIER_ACK TODO: Either resync them, or ensure peer was not rebooted. */ if (!(req->rq_state & RQ_NET_OK)) { diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 7be069fcca57..0b2bfb58d9c5 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -145,6 +145,7 @@ enum drbd_ret_codes { ERR_CONNECTED = 151, /* DRBD 8.3 only */ ERR_PERM = 152, ERR_NEED_APV_93 = 153, + ERR_STONITH_AND_PROT_A = 154, /* insert new ones above this line */ AFTER_LAST_ERR_CODE -- cgit v1.2.3-55-g7522 From 9a31d7164d409ca59cfadb7957ac7b0acf4545b8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 5 Jul 2010 13:42:03 +0200 Subject: drbd: New sync parameters for the smart resync rate controller Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 6 +++++- drivers/block/drbd/drbd_nl.c | 4 ++++ include/linux/drbd_limits.h | 24 ++++++++++++------------ include/linux/drbd_nl.h | 4 ++++ 4 files changed, 25 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 410d3d4f361e..5a484c1f5ce7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2690,7 +2690,11 @@ static void drbd_set_defaults(struct drbd_conf *mdev) /* .cpu_mask = */ {}, 0, /* .csums_alg = */ {}, 0, /* .use_rle = */ 0, - /* .on_no_data = */ DRBD_ON_NO_DATA_DEF + /* .on_no_data = */ DRBD_ON_NO_DATA_DEF, + /* .c_plan_ahead = */ DRBD_C_PLAN_AHEAD_DEF, + /* .c_delay_target = */ DRBD_C_DELAY_TARGET_DEF, + /* .c_fill_target = */ DRBD_C_FILL_TARGET_DEF, + /* .c_max_rate = */ DRBD_C_MAX_RATE_DEF }; /* Have to use that way, because the layout differs between diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 6c08e637e25c..7d384fd39c16 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1599,6 +1599,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n sc.after = DRBD_AFTER_DEF; sc.al_extents = DRBD_AL_EXTENTS_DEF; sc.on_no_data = DRBD_ON_NO_DATA_DEF; + sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF; + sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF; + sc.c_fill_target = DRBD_C_FILL_TARGET_DEF; + sc.c_max_rate = DRBD_C_MAX_RATE_DEF; } else memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 7eb1e98009ec..06dbba47a8ef 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -134,21 +134,21 @@ #define DRBD_MAX_BIO_BVECS_MAX 128 #define DRBD_MAX_BIO_BVECS_DEF 0 -#define DRBD_DP_VOLUME_MIN 4 -#define DRBD_DP_VOLUME_MAX 1048576 -#define DRBD_DP_VOLUME_DEF 16384 +#define DRBD_C_PLAN_AHEAD_MIN 0 +#define DRBD_C_PLAN_AHEAD_MAX 300 +#define DRBD_C_PLAN_AHEAD_DEF 0 /* RS rate controller disabled by default */ -#define DRBD_DP_INTERVAL_MIN 1 -#define DRBD_DP_INTERVAL_MAX 600 -#define DRBD_DP_INTERVAL_DEF 5 +#define DRBD_C_DELAY_TARGET_MIN 1 +#define DRBD_C_DELAY_TARGET_MAX 100 +#define DRBD_C_DELAY_TARGET_DEF 10 -#define DRBD_RS_THROTTLE_TH_MIN 1 -#define DRBD_RS_THROTTLE_TH_MAX 600 -#define DRBD_RS_THROTTLE_TH_DEF 20 +#define DRBD_C_FILL_TARGET_MIN 0 +#define DRBD_C_FILL_TARGET_MAX 100000 +#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */ -#define DRBD_RS_HOLD_OFF_TH_MIN 1 -#define DRBD_RS_HOLD_OFF_TH_MAX 6000 -#define DRBD_RS_HOLD_OFF_TH_DEF 100 +#define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */ +#define DRBD_C_MAX_RATE_MAX (4 << 20) +#define DRBD_C_MAX_RATE_DEF 102400 #undef RANGE #endif diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index 9aebd0d80a5d..e23683c87ca1 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -88,6 +88,10 @@ NL_PACKET(syncer_conf, 8, NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX) NL_BIT( 65, T_MAY_IGNORE, use_rle) NL_INTEGER( 75, T_MAY_IGNORE, on_no_data) + NL_INTEGER( 76, T_MAY_IGNORE, c_plan_ahead) + NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target) + NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target) + NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate) ) NL_PACKET(invalidate, 9, ) -- cgit v1.2.3-55-g7522 From 0f0601f4ea2f53cfd8bcae060fb03d9bbde070ec Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 11 Aug 2010 23:40:24 +0200 Subject: drbd: new configuration parameter c-min-rate We now track the data rate of locally submitted resync related requests, and can thus detect non-resync activity on the lower level device. If the current sync rate is above c-min-rate, and the lower level device appears to be busy, we throttle the resyncer. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 7 ++- drivers/block/drbd/drbd_nl.c | 3 +- drivers/block/drbd/drbd_receiver.c | 88 +++++++++++++++++++++++++++++++++++--- drivers/block/drbd/drbd_worker.c | 29 ++++++++----- include/linux/drbd_limits.h | 4 ++ include/linux/drbd_nl.h | 1 + 7 files changed, 116 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 0fce3f36fc1c..0fedcc0b8dc9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1513,6 +1513,7 @@ extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int); extern void resync_timer_fn(unsigned long data); /* drbd_receiver.c */ +extern int drbd_rs_should_slow_down(struct drbd_conf *mdev); extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, const unsigned rw, const int fault_type); extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 1ff8418ae0fa..db93eee7e543 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1098,6 +1098,8 @@ int __drbd_set_state(struct drbd_conf *mdev, mdev->ov_left = mdev->rs_total - BM_SECT_TO_BIT(mdev->ov_position); mdev->rs_start = now; + mdev->rs_last_events = 0; + mdev->rs_last_sect_ev = 0; mdev->ov_last_oos_size = 0; mdev->ov_last_oos_start = 0; @@ -2706,7 +2708,8 @@ static void drbd_set_defaults(struct drbd_conf *mdev) /* .c_plan_ahead = */ DRBD_C_PLAN_AHEAD_DEF, /* .c_delay_target = */ DRBD_C_DELAY_TARGET_DEF, /* .c_fill_target = */ DRBD_C_FILL_TARGET_DEF, - /* .c_max_rate = */ DRBD_C_MAX_RATE_DEF + /* .c_max_rate = */ DRBD_C_MAX_RATE_DEF, + /* .c_min_rate = */ DRBD_C_MIN_RATE_DEF }; /* Have to use that way, because the layout differs between @@ -2742,6 +2745,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->packet_seq, 0); atomic_set(&mdev->pp_in_use, 0); atomic_set(&mdev->rs_sect_in, 0); + atomic_set(&mdev->rs_sect_ev, 0); mutex_init(&mdev->md_io_mutex); mutex_init(&mdev->data.mutex); @@ -2819,6 +2823,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) mdev->rs_total = mdev->rs_failed = 0; mdev->rs_last_events = 0; + mdev->rs_last_sect_ev = 0; for (i = 0; i < DRBD_SYNC_MARKS; i++) { mdev->rs_mark_left[i] = 0; mdev->rs_mark_time[i] = 0; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 295b8d593708..6b35d41706e4 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1604,7 +1604,8 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF; sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF; sc.c_fill_target = DRBD_C_FILL_TARGET_DEF; - sc.c_max_rate = DRBD_C_MAX_RATE_DEF; + sc.c_max_rate = DRBD_C_MAX_RATE_DEF; + sc.c_min_rate = DRBD_C_MIN_RATE_DEF; } else memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 346aed98027f..0d9967fef528 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1561,6 +1561,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si list_add(&e->w.list, &mdev->sync_ee); spin_unlock_irq(&mdev->req_lock); + atomic_add(data_size >> 9, &mdev->rs_sect_ev); if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) return TRUE; @@ -2017,17 +2018,66 @@ out_interrupted: return FALSE; } +/* We may throttle resync, if the lower device seems to be busy, + * and current sync rate is above c_min_rate. + * + * To decide whether or not the lower device is busy, we use a scheme similar + * to MD RAID is_mddev_idle(): if the partition stats reveal "significant" + * (more than 64 sectors) of activity we cannot account for with our own resync + * activity, it obviously is "busy". + * + * The current sync rate used here uses only the most recent two step marks, + * to have a short time average so we can react faster. + */ +int drbd_rs_should_slow_down(struct drbd_conf *mdev) +{ + struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk; + unsigned long db, dt, dbdt; + int curr_events; + int throttle = 0; + + /* feature disabled? */ + if (mdev->sync_conf.c_min_rate == 0) + return 0; + + curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + + (int)part_stat_read(&disk->part0, sectors[1]) - + atomic_read(&mdev->rs_sect_ev); + if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) { + unsigned long rs_left; + int i; + + mdev->rs_last_events = curr_events; + + /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP, + * approx. */ + i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-2) % DRBD_SYNC_MARKS; + rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; + + dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ; + if (!dt) + dt++; + db = mdev->rs_mark_left[i] - rs_left; + dbdt = Bit2KB(db/dt); + + if (dbdt > mdev->sync_conf.c_min_rate) + throttle = 1; + } + return throttle; +} + + static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) { sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); struct drbd_epoch_entry *e; struct digest_info *di = NULL; + struct p_block_req *p = (struct p_block_req *)h; + const int brps = sizeof(*p)-sizeof(*h); int size, digest_size; unsigned int fault_type; - struct p_block_req *p = - (struct p_block_req *)h; - const int brps = sizeof(*p)-sizeof(*h); + if (drbd_recv(mdev, h->payload, brps) != brps) return FALSE; @@ -2099,8 +2149,9 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) } else if (h->command == P_OV_REPLY) { e->w.cb = w_e_end_ov_reply; dec_rs_pending(mdev); - /* drbd_rs_begin_io done when we sent this request */ - goto submit; + /* drbd_rs_begin_io done when we sent this request, + * but accounting still needs to be done. */ + goto submit_for_resync; } break; @@ -2128,9 +2179,36 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) goto out_free_e; } + /* Throttle, drbd_rs_begin_io and submit should become asynchronous + * wrt the receiver, but it is not as straightforward as it may seem. + * Various places in the resync start and stop logic assume resync + * requests are processed in order, requeuing this on the worker thread + * introduces a bunch of new code for synchronization between threads. + * + * Unlimited throttling before drbd_rs_begin_io may stall the resync + * "forever", throttling after drbd_rs_begin_io will lock that extent + * for application writes for the same time. For now, just throttle + * here, where the rest of the code expects the receiver to sleep for + * a while, anyways. + */ + + /* Throttle before drbd_rs_begin_io, as that locks out application IO; + * this defers syncer requests for some time, before letting at least + * on request through. The resync controller on the receiving side + * will adapt to the incoming rate accordingly. + * + * We cannot throttle here if remote is Primary/SyncTarget: + * we would also throttle its application reads. + * In that case, throttling is done on the SyncTarget only. + */ + if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev)) + msleep(100); if (drbd_rs_begin_io(mdev, e->sector)) goto out_free_e; +submit_for_resync: + atomic_add(size >> 9, &mdev->rs_sect_ev); + submit: inc_unacked(mdev); spin_lock_irq(&mdev->req_lock); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index f5d779b4d685..99c937acb471 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -215,10 +215,8 @@ void drbd_endio_sec(struct bio *bio, int error) */ void drbd_endio_pri(struct bio *bio, int error) { - unsigned long flags; struct drbd_request *req = bio->bi_private; struct drbd_conf *mdev = req->mdev; - struct bio_and_error m; enum drbd_req_event what; int uptodate = bio_flagged(bio, BIO_UPTODATE); @@ -244,12 +242,7 @@ void drbd_endio_pri(struct bio *bio, int error) bio_put(req->private_bio); req->private_bio = ERR_PTR(error); - spin_lock_irqsave(&mdev->req_lock, flags); - __req_mod(req, what, &m); - spin_unlock_irqrestore(&mdev->req_lock, flags); - - if (m.bio) - complete_master_bio(mdev, &m); + req_mod(req, what); } int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) @@ -376,6 +369,9 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) if (!get_ldev(mdev)) return -EIO; + if (drbd_rs_should_slow_down(mdev)) + goto defer; + /* GFP_TRY, because if there is no memory available right now, this may * be rescheduled for later. It is "only" background resync, after all. */ e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY); @@ -387,6 +383,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) list_add(&e->w.list, &mdev->read_ee); spin_unlock_irq(&mdev->req_lock); + atomic_add(size >> 9, &mdev->rs_sect_ev); if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) return 0; @@ -512,8 +509,9 @@ int w_make_resync_request(struct drbd_conf *mdev, sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); int max_segment_size; - int number, i, rollback_i, size, pe, mx; + int number, rollback_i, size, pe, mx; int align, queued, sndbuf; + int i = 0; if (unlikely(cancel)) return 1; @@ -549,7 +547,14 @@ int w_make_resync_request(struct drbd_conf *mdev, mdev->c_sync_rate = mdev->sync_conf.rate; number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); } - pe = atomic_read(&mdev->rs_pending_cnt); + + /* Throttle resync on lower level disk activity, which may also be + * caused by application IO on Primary/SyncTarget. + * Keep this after the call to drbd_rs_controller, as that assumes + * to be called as precisely as possible every SLEEP_TIME, + * and would be confused otherwise. */ + if (drbd_rs_should_slow_down(mdev)) + goto requeue; mutex_lock(&mdev->data.mutex); if (mdev->data.socket) @@ -563,6 +568,7 @@ int w_make_resync_request(struct drbd_conf *mdev, mx = number; /* Limit the number of pending RS requests to no more than the peer's receive buffer */ + pe = atomic_read(&mdev->rs_pending_cnt); if ((pe + number) > mx) { number = mx - pe; } @@ -1492,6 +1498,8 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) mdev->rs_failed = 0; mdev->rs_paused = 0; mdev->rs_same_csum = 0; + mdev->rs_last_events = 0; + mdev->rs_last_sect_ev = 0; mdev->rs_total = tw; mdev->rs_start = now; for (i = 0; i < DRBD_SYNC_MARKS; i++) { @@ -1516,6 +1524,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) } atomic_set(&mdev->rs_sect_in, 0); + atomic_set(&mdev->rs_sect_ev, 0); mdev->rs_in_flight = 0; mdev->rs_planed = 0; spin_lock(&mdev->peer_seq_lock); diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 06dbba47a8ef..0b24ded6fffd 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -150,5 +150,9 @@ #define DRBD_C_MAX_RATE_MAX (4 << 20) #define DRBD_C_MAX_RATE_DEF 102400 +#define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */ +#define DRBD_C_MIN_RATE_MAX (4 << 20) +#define DRBD_C_MIN_RATE_DEF 4096 + #undef RANGE #endif diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index e23683c87ca1..ade91107c9a5 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -92,6 +92,7 @@ NL_PACKET(syncer_conf, 8, NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target) NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target) NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate) + NL_INTEGER( 80, T_MAY_IGNORE, c_min_rate) ) NL_PACKET(invalidate, 9, ) -- cgit v1.2.3-55-g7522 From 0b70a13dac014ec9274640b9e945bde493ba365e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 20 Aug 2010 13:36:10 +0200 Subject: drbd: Sending of big packets, for payloads from 64KByte to 4GByte Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 74 +++++++++++++++++------------- drivers/block/drbd/drbd_main.c | 79 +++++++++++++++++++------------- drivers/block/drbd/drbd_receiver.c | 94 +++++++++++++++++++------------------- drivers/block/drbd/drbd_worker.c | 2 +- include/linux/drbd.h | 2 + 5 files changed, 139 insertions(+), 112 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 0fedcc0b8dc9..3f10efc2ac14 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -337,13 +337,25 @@ static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c) * NOTE that the payload starts at a long aligned offset, * regardless of 32 or 64 bit arch! */ -struct p_header { +struct p_header80 { u32 magic; u16 command; u16 length; /* bytes of data after this header */ u8 payload[0]; } __packed; -/* 8 bytes. packet FIXED for the next century! */ + +/* Header for big packets, Used for data packets exceeding 64kB */ +struct p_header95 { + u16 magic; /* use DRBD_MAGIC_BIG here */ + u16 command; + u32 length; + u8 payload[0]; +} __packed; + +union p_header { + struct p_header80 h80; + struct p_header95 h95; +}; /* * short commands, packets without payload, plain p_header: @@ -367,7 +379,7 @@ struct p_header { #define DP_MAY_SET_IN_SYNC 4 struct p_data { - struct p_header head; + union p_header head; u64 sector; /* 64 bits sector number */ u64 block_id; /* to identify the request in protocol B&C */ u32 seq_num; @@ -383,7 +395,7 @@ struct p_data { * P_DATA_REQUEST, P_RS_DATA_REQUEST */ struct p_block_ack { - struct p_header head; + struct p_header80 head; u64 sector; u64 block_id; u32 blksize; @@ -392,7 +404,7 @@ struct p_block_ack { struct p_block_req { - struct p_header head; + struct p_header80 head; u64 sector; u64 block_id; u32 blksize; @@ -409,7 +421,7 @@ struct p_block_req { */ struct p_handshake { - struct p_header head; /* 8 bytes */ + struct p_header80 head; /* 8 bytes */ u32 protocol_min; u32 feature_flags; u32 protocol_max; @@ -424,19 +436,19 @@ struct p_handshake { /* 80 bytes, FIXED for the next century */ struct p_barrier { - struct p_header head; + struct p_header80 head; u32 barrier; /* barrier number _handle_ only */ u32 pad; /* to multiple of 8 Byte */ } __packed; struct p_barrier_ack { - struct p_header head; + struct p_header80 head; u32 barrier; u32 set_size; } __packed; struct p_rs_param { - struct p_header head; + struct p_header80 head; u32 rate; /* Since protocol version 88 and higher. */ @@ -444,7 +456,7 @@ struct p_rs_param { } __packed; struct p_rs_param_89 { - struct p_header head; + struct p_header80 head; u32 rate; /* protocol version 89: */ char verify_alg[SHARED_SECRET_MAX]; @@ -452,7 +464,7 @@ struct p_rs_param_89 { } __packed; struct p_rs_param_95 { - struct p_header head; + struct p_header80 head; u32 rate; char verify_alg[SHARED_SECRET_MAX]; char csums_alg[SHARED_SECRET_MAX]; @@ -468,7 +480,7 @@ enum drbd_conn_flags { }; struct p_protocol { - struct p_header head; + struct p_header80 head; u32 protocol; u32 after_sb_0p; u32 after_sb_1p; @@ -482,17 +494,17 @@ struct p_protocol { } __packed; struct p_uuids { - struct p_header head; + struct p_header80 head; u64 uuid[UI_EXTENDED_SIZE]; } __packed; struct p_rs_uuid { - struct p_header head; + struct p_header80 head; u64 uuid; } __packed; struct p_sizes { - struct p_header head; + struct p_header80 head; u64 d_size; /* size of disk */ u64 u_size; /* user requested size */ u64 c_size; /* current exported size */ @@ -502,18 +514,18 @@ struct p_sizes { } __packed; struct p_state { - struct p_header head; + struct p_header80 head; u32 state; } __packed; struct p_req_state { - struct p_header head; + struct p_header80 head; u32 mask; u32 val; } __packed; struct p_req_state_reply { - struct p_header head; + struct p_header80 head; u32 retcode; } __packed; @@ -528,7 +540,7 @@ struct p_drbd06_param { } __packed; struct p_discard { - struct p_header head; + struct p_header80 head; u64 block_id; u32 seq_num; u32 pad; @@ -544,7 +556,7 @@ enum drbd_bitmap_code { }; struct p_compressed_bm { - struct p_header head; + struct p_header80 head; /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code * (encoding & 0x80): polarity (set/unset) of first runlength * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits @@ -555,10 +567,10 @@ struct p_compressed_bm { u8 code[0]; } __packed; -struct p_delay_probe { - struct p_header head; - u32 seq_num; /* sequence number to match the two probe packets */ - u32 offset; /* usecs the probe got sent after the reference time point */ +struct p_delay_probe93 { + struct p_header80 head; + u32 seq_num; /* sequence number to match the two probe packets */ + u32 offset; /* usecs the probe got sent after the reference time point */ } __packed; /* DCBP: Drbd Compressed Bitmap Packet ... */ @@ -605,7 +617,7 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n) * so we need to use the fixed size 4KiB page size * most architechtures have used for a long time. */ -#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header)) +#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header80)) #define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long)) #define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm)) #if (PAGE_SIZE < 4096) @@ -614,7 +626,7 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n) #endif union p_polymorph { - struct p_header header; + struct p_header80 header; struct p_handshake handshake; struct p_data data; struct p_block_ack block_ack; @@ -1188,12 +1200,12 @@ extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_f extern int _drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_state(struct drbd_conf *mdev); extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header *h, + enum drbd_packets cmd, struct p_header80 *h, size_t size, unsigned msg_flags); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, - enum drbd_packets cmd, struct p_header *h, + enum drbd_packets cmd, struct p_header80 *h, size_t size); extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, size_t size); @@ -1936,19 +1948,19 @@ static inline void request_ping(struct drbd_conf *mdev) static inline int drbd_send_short_cmd(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_header h; + struct p_header80 h; return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); } static inline int drbd_send_ping(struct drbd_conf *mdev) { - struct p_header h; + struct p_header80 h; return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h)); } static inline int drbd_send_ping_ack(struct drbd_conf *mdev) { - struct p_header h; + struct p_header80 h; return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index db93eee7e543..f3f4ea9c5eb9 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1647,7 +1647,7 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) /* the appropriate socket mutex must be held already */ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header *h, + enum drbd_packets cmd, struct p_header80 *h, size_t size, unsigned msg_flags) { int sent, ok; @@ -1657,7 +1657,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, h->magic = BE_DRBD_MAGIC; h->command = cpu_to_be16(cmd); - h->length = cpu_to_be16(size-sizeof(struct p_header)); + h->length = cpu_to_be16(size-sizeof(struct p_header80)); sent = drbd_send(mdev, sock, h, size, msg_flags); @@ -1672,7 +1672,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, * when we hold the appropriate socket mutex. */ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, - enum drbd_packets cmd, struct p_header *h, size_t size) + enum drbd_packets cmd, struct p_header80 *h, size_t size) { int ok = 0; struct socket *sock; @@ -1700,7 +1700,7 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, size_t size) { - struct p_header h; + struct p_header80 h; int ok; h.magic = BE_DRBD_MAGIC; @@ -1807,7 +1807,7 @@ int drbd_send_protocol(struct drbd_conf *mdev) strcpy(p->integrity_alg, mdev->net_conf->integrity_alg); rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL, - (struct p_header *)p, size); + (struct p_header80 *)p, size); kfree(p); return rv; } @@ -1833,7 +1833,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) put_ldev(mdev); return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); } int drbd_send_uuids(struct drbd_conf *mdev) @@ -1854,7 +1854,7 @@ int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val) p.uuid = cpu_to_be64(val); return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); } int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags) @@ -1884,7 +1884,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl p.dds_flags = cpu_to_be16(flags); ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); return ok; } @@ -1909,7 +1909,7 @@ int drbd_send_state(struct drbd_conf *mdev) if (likely(sock != NULL)) { ok = _drbd_send_cmd(mdev, sock, P_STATE, - (struct p_header *)&p, sizeof(p), 0); + (struct p_header80 *)&p, sizeof(p), 0); } mutex_unlock(&mdev->data.mutex); @@ -1927,7 +1927,7 @@ int drbd_send_state_req(struct drbd_conf *mdev, p.val = cpu_to_be32(val.i); return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); } int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode) @@ -1937,7 +1937,7 @@ int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode) p.retcode = cpu_to_be32(retcode); return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); } int fill_bitmap_rle_bits(struct drbd_conf *mdev, @@ -2036,7 +2036,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, enum { OK, FAILED, DONE } send_bitmap_rle_or_plain(struct drbd_conf *mdev, - struct p_header *h, struct bm_xfer_ctx *c) + struct p_header80 *h, struct bm_xfer_ctx *c) { struct p_compressed_bm *p = (void*)h; unsigned long num_words; @@ -2066,12 +2066,12 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, if (len) drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload); ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP, - h, sizeof(struct p_header) + len, 0); + h, sizeof(struct p_header80) + len, 0); c->word_offset += num_words; c->bit_offset = c->word_offset * BITS_PER_LONG; c->packets[1]++; - c->bytes[1] += sizeof(struct p_header) + len; + c->bytes[1] += sizeof(struct p_header80) + len; if (c->bit_offset > c->bm_bits) c->bit_offset = c->bm_bits; @@ -2087,14 +2087,14 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, int _drbd_send_bitmap(struct drbd_conf *mdev) { struct bm_xfer_ctx c; - struct p_header *p; + struct p_header80 *p; int ret; ERR_IF(!mdev->bitmap) return FALSE; /* maybe we should use some per thread scratch page, * and allocate that during initial device creation? */ - p = (struct p_header *) __get_free_page(GFP_NOIO); + p = (struct p_header80 *) __get_free_page(GFP_NOIO); if (!p) { dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); return FALSE; @@ -2152,7 +2152,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) if (mdev->state.conn < C_CONNECTED) return FALSE; ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); return ok; } @@ -2180,7 +2180,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED) return FALSE; ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); return ok; } @@ -2188,8 +2188,8 @@ int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, struct p_data *dp) { const int header_size = sizeof(struct p_data) - - sizeof(struct p_header); - int data_size = ((struct p_header *)dp)->length - header_size; + - sizeof(struct p_header80); + int data_size = ((struct p_header80 *)dp)->length - header_size; return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), dp->block_id); @@ -2238,7 +2238,7 @@ int drbd_send_drequest(struct drbd_conf *mdev, int cmd, p.blksize = cpu_to_be32(size); ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); return ok; } @@ -2256,7 +2256,7 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, p.head.magic = BE_DRBD_MAGIC; p.head.command = cpu_to_be16(cmd); - p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header) + digest_size); + p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size); mutex_lock(&mdev->data.mutex); @@ -2278,7 +2278,7 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) p.blksize = cpu_to_be32(size); ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); return ok; } @@ -2447,10 +2447,17 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; - p.head.magic = BE_DRBD_MAGIC; - p.head.command = cpu_to_be16(P_DATA); - p.head.length = - cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->size); + if (req->size <= (1 << 15)) { + p.head.h80.magic = BE_DRBD_MAGIC; + p.head.h80.command = cpu_to_be16(P_DATA); + p.head.h80.length = + cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->size); + } else { + p.head.h95.magic = BE_DRBD_MAGIC_BIG; + p.head.h95.command = cpu_to_be16(P_DATA); + p.head.h95.length = + cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->size); + } p.sector = cpu_to_be64(req->sector); p.block_id = (unsigned long)req; @@ -2511,10 +2518,17 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; - p.head.magic = BE_DRBD_MAGIC; - p.head.command = cpu_to_be16(cmd); - p.head.length = - cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + e->size); + if (e->size <= (1 << 15)) { + p.head.h80.magic = BE_DRBD_MAGIC; + p.head.h80.command = cpu_to_be16(cmd); + p.head.h80.length = + cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->size); + } else { + p.head.h95.magic = BE_DRBD_MAGIC_BIG; + p.head.h95.command = cpu_to_be16(cmd); + p.head.h95.length = + cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->size); + } p.sector = cpu_to_be64(e->sector); p.block_id = e->block_id; @@ -2527,8 +2541,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, if (!drbd_get_data_sock(mdev)) return 0; - ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, - sizeof(p), dgs ? MSG_MORE : 0); + ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); if (ok && dgs) { dgb = mdev->int_dig_out; drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 591a171291d9..9b3321e2c3cd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -720,14 +720,14 @@ out: static int drbd_send_fp(struct drbd_conf *mdev, struct socket *sock, enum drbd_packets cmd) { - struct p_header *h = (struct p_header *) &mdev->data.sbuf.header; + struct p_header80 *h = (struct p_header80 *) &mdev->data.sbuf.header; return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); } static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) { - struct p_header *h = (struct p_header *) &mdev->data.sbuf.header; + struct p_header80 *h = (struct p_header80 *) &mdev->data.sbuf.header; int rr; rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0); @@ -944,7 +944,7 @@ out_release_sockets: return -1; } -static int drbd_recv_header(struct drbd_conf *mdev, struct p_header *h) +static int drbd_recv_header(struct drbd_conf *mdev, struct p_header80 *h) { int r; @@ -1266,7 +1266,7 @@ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __relea return 1; } -static int receive_Barrier(struct drbd_conf *mdev, struct p_header *h) +static int receive_Barrier(struct drbd_conf *mdev, struct p_header80 *h) { int rv, issue_flush; struct p_barrier *p = (struct p_barrier *)h; @@ -1570,7 +1570,7 @@ fail: return FALSE; } -static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h) +static int receive_DataReply(struct drbd_conf *mdev, struct p_header80 *h) { struct drbd_request *req; sector_t sector; @@ -1610,7 +1610,7 @@ static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h) return ok; } -static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h) +static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header80 *h) { sector_t sector; unsigned int header_size, data_size; @@ -1767,7 +1767,7 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) } /* mirrored write */ -static int receive_Data(struct drbd_conf *mdev, struct p_header *h) +static int receive_Data(struct drbd_conf *mdev, struct p_header80 *h) { sector_t sector; struct drbd_epoch_entry *e; @@ -2066,7 +2066,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev) } -static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) +static int receive_DataRequest(struct drbd_conf *mdev, struct p_header80 *h) { sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); @@ -2756,7 +2756,7 @@ static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) return 1; } -static int receive_protocol(struct drbd_conf *mdev, struct p_header *h) +static int receive_protocol(struct drbd_conf *mdev, struct p_header80 *h) { struct p_protocol *p = (struct p_protocol *)h; int header_size, data_size; @@ -2862,7 +2862,7 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, return tfm; } -static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) +static int receive_SyncParam(struct drbd_conf *mdev, struct p_header80 *h) { int ok = TRUE; struct p_rs_param_95 *p = (struct p_rs_param_95 *)h; @@ -3032,7 +3032,7 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev, (unsigned long long)a, (unsigned long long)b); } -static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) +static int receive_sizes(struct drbd_conf *mdev, struct p_header80 *h) { struct p_sizes *p = (struct p_sizes *)h; enum determine_dev_size dd = unchanged; @@ -3148,7 +3148,7 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int receive_uuids(struct drbd_conf *mdev, struct p_header *h) +static int receive_uuids(struct drbd_conf *mdev, struct p_header80 *h) { struct p_uuids *p = (struct p_uuids *)h; u64 *p_uuid; @@ -3241,7 +3241,7 @@ static union drbd_state convert_state(union drbd_state ps) return ms; } -static int receive_req_state(struct drbd_conf *mdev, struct p_header *h) +static int receive_req_state(struct drbd_conf *mdev, struct p_header80 *h) { struct p_req_state *p = (struct p_req_state *)h; union drbd_state mask, val; @@ -3271,7 +3271,7 @@ static int receive_req_state(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int receive_state(struct drbd_conf *mdev, struct p_header *h) +static int receive_state(struct drbd_conf *mdev, struct p_header80 *h) { struct p_state *p = (struct p_state *)h; enum drbd_conns nconn, oconn; @@ -3395,7 +3395,7 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h) +static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header80 *h) { struct p_rs_uuid *p = (struct p_rs_uuid *)h; @@ -3428,7 +3428,7 @@ static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h) enum receive_bitmap_ret { OK, DONE, FAILED }; static enum receive_bitmap_ret -receive_bitmap_plain(struct drbd_conf *mdev, struct p_header *h, +receive_bitmap_plain(struct drbd_conf *mdev, struct p_header80 *h, unsigned long *buffer, struct bm_xfer_ctx *c) { unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); @@ -3533,7 +3533,7 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, const char *direction, struct bm_xfer_ctx *c) { /* what would it take to transfer it "plaintext" */ - unsigned plain = sizeof(struct p_header) * + unsigned plain = sizeof(struct p_header80) * ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1) + c->bm_words * sizeof(long); unsigned total = c->bytes[0] + c->bytes[1]; @@ -3571,7 +3571,7 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, in order to be agnostic to the 32 vs 64 bits issue. returns 0 on failure, 1 if we successfully received it. */ -static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) +static int receive_bitmap(struct drbd_conf *mdev, struct p_header80 *h) { struct bm_xfer_ctx c; void *buffer; @@ -3623,7 +3623,7 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) } c.packets[h->command == P_BITMAP]++; - c.bytes[h->command == P_BITMAP] += sizeof(struct p_header) + h->length; + c.bytes[h->command == P_BITMAP] += sizeof(struct p_header80) + h->length; if (ret != OK) break; @@ -3659,7 +3659,7 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) return ok; } -static int receive_skip_(struct drbd_conf *mdev, struct p_header *h, int silent) +static int receive_skip_(struct drbd_conf *mdev, struct p_header80 *h, int silent) { /* TODO zero copy sink :) */ static char sink[128]; @@ -3679,17 +3679,17 @@ static int receive_skip_(struct drbd_conf *mdev, struct p_header *h, int silent) return size == 0; } -static int receive_skip(struct drbd_conf *mdev, struct p_header *h) +static int receive_skip(struct drbd_conf *mdev, struct p_header80 *h) { return receive_skip_(mdev, h, 0); } -static int receive_skip_silent(struct drbd_conf *mdev, struct p_header *h) +static int receive_skip_silent(struct drbd_conf *mdev, struct p_header80 *h) { return receive_skip_(mdev, h, 1); } -static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h) +static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header80 *h) { if (mdev->state.disk >= D_INCONSISTENT) drbd_kick_lo(mdev); @@ -3701,7 +3701,7 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header *); +typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header80 *); static drbd_cmd_handler_f drbd_default_handler[] = { [P_DATA] = receive_Data, @@ -3736,7 +3736,7 @@ static drbd_cmd_handler_f *drbd_opt_cmd_handler; static void drbdd(struct drbd_conf *mdev) { drbd_cmd_handler_f handler; - struct p_header *header = &mdev->data.rbuf.header; + struct p_header80 *header = &mdev->data.rbuf.header; while (get_t_state(&mdev->receiver) == Running) { drbd_thread_current_set_cpu(mdev); @@ -3964,7 +3964,7 @@ static int drbd_send_handshake(struct drbd_conf *mdev) p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE, - (struct p_header *)p, sizeof(*p), 0 ); + (struct p_header80 *)p, sizeof(*p), 0 ); mutex_unlock(&mdev->data.mutex); return ok; } @@ -3981,7 +3981,7 @@ static int drbd_do_handshake(struct drbd_conf *mdev) /* ASSERT current == mdev->receiver ... */ struct p_handshake *p = &mdev->data.rbuf.handshake; const int expect = sizeof(struct p_handshake) - -sizeof(struct p_header); + -sizeof(struct p_header80); int rv; rv = drbd_send_handshake(mdev); @@ -4058,7 +4058,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) char *response = NULL; char *right_response = NULL; char *peers_ch = NULL; - struct p_header p; + struct p_header80 p; unsigned int key_len = strlen(mdev->net_conf->shared_secret); unsigned int resp_size; struct hash_desc desc; @@ -4231,7 +4231,7 @@ int drbdd_init(struct drbd_thread *thi) /* ********* acknowledge sender ******** */ -static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h) +static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h) { struct p_req_state_reply *p = (struct p_req_state_reply *)h; @@ -4249,13 +4249,13 @@ static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int got_Ping(struct drbd_conf *mdev, struct p_header *h) +static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h) { return drbd_send_ping_ack(mdev); } -static int got_PingAck(struct drbd_conf *mdev, struct p_header *h) +static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h) { /* restore idle timeout */ mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; @@ -4265,7 +4265,7 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h) +static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) { struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); @@ -4336,7 +4336,7 @@ static int validate_req_change_req_state(struct drbd_conf *mdev, return TRUE; } -static int got_BlockAck(struct drbd_conf *mdev, struct p_header *h) +static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) { struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); @@ -4376,7 +4376,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header *h) _ack_id_to_req, __func__ , what); } -static int got_NegAck(struct drbd_conf *mdev, struct p_header *h) +static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) { struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); @@ -4396,7 +4396,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header *h) _ack_id_to_req, __func__ , neg_acked); } -static int got_NegDReply(struct drbd_conf *mdev, struct p_header *h) +static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) { struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); @@ -4409,7 +4409,7 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header *h) _ar_id_to_req, __func__ , neg_acked); } -static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h) +static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) { sector_t sector; int size; @@ -4431,7 +4431,7 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h) +static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) { struct p_barrier_ack *p = (struct p_barrier_ack *)h; @@ -4440,7 +4440,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int got_OVResult(struct drbd_conf *mdev, struct p_header *h) +static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) { struct p_block_ack *p = (struct p_block_ack *)h; struct drbd_work *w; @@ -4474,7 +4474,7 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header *h) +static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header80 *h) { /* IGNORE */ return TRUE; @@ -4482,7 +4482,7 @@ static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header *h) struct asender_cmd { size_t pkt_size; - int (*process)(struct drbd_conf *mdev, struct p_header *h); + int (*process)(struct drbd_conf *mdev, struct p_header80 *h); }; static struct asender_cmd *get_asender_cmd(int cmd) @@ -4491,8 +4491,8 @@ static struct asender_cmd *get_asender_cmd(int cmd) /* anything missing from this table is in * the drbd_cmd_handler (drbd_default_handler) table, * see the beginning of drbdd() */ - [P_PING] = { sizeof(struct p_header), got_Ping }, - [P_PING_ACK] = { sizeof(struct p_header), got_PingAck }, + [P_PING] = { sizeof(struct p_header80), got_Ping }, + [P_PING_ACK] = { sizeof(struct p_header80), got_PingAck }, [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, @@ -4504,7 +4504,7 @@ static struct asender_cmd *get_asender_cmd(int cmd) [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, - [P_DELAY_PROBE] = { sizeof(struct p_delay_probe), got_something_to_ignore_m }, + [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_something_to_ignore_m }, [P_MAX_CMD] = { 0, NULL }, }; if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) @@ -4515,13 +4515,13 @@ static struct asender_cmd *get_asender_cmd(int cmd) int drbd_asender(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; - struct p_header *h = &mdev->meta.rbuf.header; + struct p_header80 *h = &mdev->meta.rbuf.header; struct asender_cmd *cmd = NULL; int rv, len; void *buf = h; int received = 0; - int expect = sizeof(struct p_header); + int expect = sizeof(struct p_header80); int empty; sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); @@ -4621,7 +4621,7 @@ int drbd_asender(struct drbd_thread *thi) goto disconnect; } expect = cmd->pkt_size; - ERR_IF(len != expect-sizeof(struct p_header)) + ERR_IF(len != expect-sizeof(struct p_header80)) goto reconnect; } if (received == expect) { @@ -4631,7 +4631,7 @@ int drbd_asender(struct drbd_thread *thi) buf = h; received = 0; - expect = sizeof(struct p_header); + expect = sizeof(struct p_header80); cmd = NULL; } } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 1eeb55423b3e..3d0e14e3ade3 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1204,7 +1204,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * dec_ap_pending will be done in got_BarrierAck * or (on connection loss) in w_clear_epoch. */ ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER, - (struct p_header *)p, sizeof(*p), 0); + (struct p_header80 *)p, sizeof(*p), 0); drbd_put_data_sock(mdev); return ok; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 0b2bfb58d9c5..89718a39791e 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -318,6 +318,8 @@ enum drbd_timeout_flag { #define DRBD_MAGIC 0x83740267 #define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC) +#define DRBD_MAGIC_BIG 0x835a +#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG) /* these are of type "int" */ #define DRBD_MD_INDEX_INTERNAL -1 -- cgit v1.2.3-55-g7522 From fb22c402ffdf61dd121795b5809de587185d5240 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 8 Sep 2010 23:20:21 +0200 Subject: drbd: Track the reasons to suspend IO in dedicated state bits There are three ways to get IO suspended: * Loss of any access to data * Fence-peer-handler running * User requested to suspend IO Track those in different bits, so that one condition clearing its state bit does not interfere with the other two conditions. Only when the user resumes IO he overrules all three bits. The fact is hidden from the user, he sees only a single suspend bit. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 9 ++++++++- drivers/block/drbd/drbd_main.c | 36 +++++++++++++++++++++++------------- drivers/block/drbd/drbd_nl.c | 20 ++++++++++++++------ drivers/block/drbd/drbd_proc.c | 2 +- drivers/block/drbd/drbd_receiver.c | 6 +++--- drivers/block/drbd/drbd_req.c | 6 +++--- include/linux/drbd.h | 10 +++++++--- 7 files changed, 59 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index eb1273d04caf..ff7fffa00dac 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1681,6 +1681,8 @@ void drbd_bcast_ee(struct drbd_conf *mdev, #define susp_MASK 1 #define user_isp_MASK 1 #define aftr_isp_MASK 1 +#define susp_nod_MASK 1 +#define susp_fen_MASK 1 #define NS(T, S) \ ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \ @@ -2254,11 +2256,16 @@ static inline int drbd_state_is_stable(union drbd_state s) return 1; } +static inline int is_susp(union drbd_state s) +{ + return s.susp || s.susp_nod || s.susp_fen; +} + static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) { int mxb = drbd_get_max_buffers(mdev); - if (mdev->state.susp) + if (is_susp(mdev->state)) return 0; if (test_bit(SUSPEND_IO, &mdev->flags)) return 0; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 04c305d36f8e..4f33714fb3cd 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -654,7 +654,7 @@ static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) drbd_role_str(ns.peer), drbd_disk_str(ns.disk), drbd_disk_str(ns.pdsk), - ns.susp ? 's' : 'r', + is_susp(ns) ? 's' : 'r', ns.aftr_isp ? 'a' : '-', ns.peer_isp ? 'p' : '-', ns.user_isp ? 'u' : '-' @@ -925,12 +925,12 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state if (fp == FP_STONITH && (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) - ns.susp = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ + ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO && (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) && !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE)) - ns.susp = 1; /* Suspend IO while no data available (no accessible data available) */ + ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { if (ns.conn == C_SYNC_SOURCE) @@ -1030,7 +1030,10 @@ int __drbd_set_state(struct drbd_conf *mdev, PSC(conn); PSC(disk); PSC(pdsk); - PSC(susp); + if (is_susp(ns) != is_susp(os)) + pbp += sprintf(pbp, "susp( %s -> %s ) ", + drbd_susp_str(is_susp(os)), + drbd_susp_str(is_susp(ns))); PSC(aftr_isp); PSC(peer_isp); PSC(user_isp); @@ -1218,6 +1221,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, { enum drbd_fencing_p fp; enum drbd_req_event what = nothing; + union drbd_state nsm = (union drbd_state){ .i = -1 }; if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { clear_bit(CRASHED_PRIMARY, &mdev->flags); @@ -1241,19 +1245,21 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Here we have the actions that are performed after a state change. This function might sleep */ - if (os.susp && ns.susp && mdev->sync_conf.on_no_data == OND_SUSPEND_IO) { + nsm.i = -1; + if (ns.susp_nod) { if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { if (ns.conn == C_CONNECTED) - what = resend; + what = resend, nsm.susp_nod = 0; else /* ns.conn > C_CONNECTED */ dev_err(DEV, "Unexpected Resynd going on!\n"); } if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) - what = restart_frozen_disk_io; + what = restart_frozen_disk_io, nsm.susp_nod = 0; + } - if (fp == FP_STONITH && ns.susp) { + if (ns.susp_fen) { /* case1: The outdate peer handler is successful: */ if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { tl_clear(mdev); @@ -1263,20 +1269,22 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, drbd_md_sync(mdev); } spin_lock_irq(&mdev->req_lock); - _drbd_set_state(_NS(mdev, susp, 0), CS_VERBOSE, NULL); + _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); spin_unlock_irq(&mdev->req_lock); } /* case2: The connection was established again: */ if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { clear_bit(NEW_CUR_UUID, &mdev->flags); what = resend; + nsm.susp_fen = 0; } } if (what != nothing) { spin_lock_irq(&mdev->req_lock); _tl_restart(mdev, what); - _drbd_set_state(_NS(mdev, susp, 0), CS_VERBOSE, NULL); + nsm.i &= mdev->state.i; + _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL); spin_unlock_irq(&mdev->req_lock); } @@ -1298,7 +1306,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (get_ldev(mdev)) { if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { - if (mdev->state.susp) { + if (is_susp(mdev->state)) { set_bit(NEW_CUR_UUID, &mdev->flags); } else { drbd_uuid_new_current(mdev); @@ -1417,7 +1425,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, resume_next_sg(mdev); /* free tl_hash if we Got thawed and are C_STANDALONE */ - if (ns.conn == C_STANDALONE && ns.susp == 0 && mdev->tl_hash) + if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash) drbd_free_tl_hash(mdev); /* Upon network connection, we need to start the receiver */ @@ -2732,7 +2740,9 @@ static void drbd_set_defaults(struct drbd_conf *mdev) .conn = C_STANDALONE, .disk = D_DISKLESS, .pdsk = D_UNKNOWN, - .susp = 0 + .susp = 0, + .susp_nod = 0, + .susp_fen = 0 } }; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 5b30f90cab3e..9ee44568dce3 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -209,7 +209,8 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) put_ldev(mdev); } else { dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n"); - return mdev->state.pdsk; + nps = mdev->state.pdsk; + goto out; } r = drbd_khelper(mdev, "fence-peer"); @@ -256,6 +257,14 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) dev_info(DEV, "fence-peer helper returned %d (%s)\n", (r>>8) & 0xff, ex_to_string); + +out: + if (mdev->state.susp_fen && nps >= D_UNKNOWN) { + /* The handler was not successful... unfreeze here, the + state engine can not unfreeze... */ + _drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE); + } + return nps; } @@ -550,7 +559,7 @@ char *ppsize(char *buf, unsigned long long size) void drbd_suspend_io(struct drbd_conf *mdev) { set_bit(SUSPEND_IO, &mdev->flags); - if (mdev->state.susp) + if (is_susp(mdev->state)) return; wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); } @@ -1016,7 +1025,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp drbd_suspend_io(mdev); /* also wait for the last barrier ack. */ - wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || mdev->state.susp); + wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state)); /* and for any other previously queued work */ drbd_flush_workqueue(mdev); @@ -1114,8 +1123,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp clear_bit(CRASHED_PRIMARY, &mdev->flags); if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && - !(mdev->state.role == R_PRIMARY && mdev->state.susp && - mdev->sync_conf.on_no_data == OND_SUSPEND_IO)) { + !(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) { set_bit(CRASHED_PRIMARY, &mdev->flags); cp_discovered = 1; } @@ -1939,7 +1947,7 @@ static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp drbd_md_sync(mdev); } drbd_suspend_io(mdev); - reply->ret_code = drbd_request_state(mdev, NS(susp, 0)); + reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); if (reply->ret_code == SS_SUCCESS) { if (mdev->state.conn < C_CONNECTED) tl_clear(mdev); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index a4a4a06908c5..aec8426c1bf6 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -213,7 +213,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) drbd_disk_str(mdev->state.pdsk), (mdev->net_conf == NULL ? ' ' : (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')), - mdev->state.susp ? 's' : 'r', + is_susp(mdev->state) ? 's' : 'r', mdev->state.aftr_isp ? 'a' : '-', mdev->state.peer_isp ? 'p' : '-', mdev->state.user_isp ? 'u' : '-', diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 983e49cbd233..6b69b2f734dc 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3315,7 +3315,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) ns.disk = mdev->new_state_tmp.disk; cs_flags = CS_VERBOSE + (oconn < C_CONNECTED && nconn >= C_CONNECTED ? 0 : CS_HARD); - if (ns.pdsk == D_CONSISTENT && ns.susp && nconn == C_CONNECTED && oconn < C_CONNECTED && + if (ns.pdsk == D_CONSISTENT && is_susp(ns) && nconn == C_CONNECTED && oconn < C_CONNECTED && test_bit(NEW_CUR_UUID, &mdev->flags)) { /* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this for temporal network outages! */ @@ -3829,7 +3829,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) kfree(mdev->p_uuid); mdev->p_uuid = NULL; - if (!mdev->state.susp) + if (!is_susp(mdev->state)) tl_clear(mdev); dev_info(DEV, "Connection closed\n"); @@ -3858,7 +3858,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) if (os.conn == C_DISCONNECTING) { wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0); - if (!mdev->state.susp) { + if (!is_susp(mdev->state)) { /* we must not free the tl_hash * while application io is still on the fly */ wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index af608b39c4e0..9e91a2545fc8 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -287,7 +287,7 @@ static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_e { struct drbd_conf *mdev = req->mdev; - if (!mdev->state.susp) + if (!is_susp(mdev->state)) _req_may_be_done(req, m); } @@ -812,7 +812,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) (mdev->state.pdsk == D_INCONSISTENT && mdev->state.conn >= C_CONNECTED)); - if (!(local || remote) && !mdev->state.susp) { + if (!(local || remote) && !is_susp(mdev->state)) { dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); goto fail_free_complete; } @@ -838,7 +838,7 @@ allocate_barrier: /* GOOD, everything prepared, grab the spin_lock */ spin_lock_irq(&mdev->req_lock); - if (mdev->state.susp) { + if (is_susp(mdev->state)) { /* If we got suspended, use the retry mechanism of generic_make_request() to restart processing of this bio. In the next call to drbd_make_request_26 diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 89718a39791e..5e72a5d3d48f 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -232,13 +232,17 @@ union drbd_state { unsigned conn:5 ; /* 17/32 cstates */ unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ - unsigned susp:1 ; /* 2/2 IO suspended no/yes */ + unsigned susp:1 ; /* 2/2 IO suspended no/yes (by user) */ unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ unsigned peer_isp:1 ; unsigned user_isp:1 ; - unsigned _pad:11; /* 0 unused */ + unsigned susp_nod:1 ; /* IO suspended because no data */ + unsigned susp_fen:1 ; /* IO suspended because fence peer handler runs*/ + unsigned _pad:9; /* 0 unused */ #elif defined(__BIG_ENDIAN_BITFIELD) - unsigned _pad:11; /* 0 unused */ + unsigned _pad:9; + unsigned susp_fen:1 ; + unsigned susp_nod:1 ; unsigned user_isp:1 ; unsigned peer_isp:1 ; unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ -- cgit v1.2.3-55-g7522 From 00b425377d60e67e86721d4ce6d7cbf131a5d0fd Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 5 Oct 2010 11:19:39 +0200 Subject: drbd: Allow larger values for c-fill-target. Connections through a compressing proxy might have more bits on the fly. 500MByte instead of 50MByte Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- include/linux/drbd_limits.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ff7fffa00dac..1680939de101 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -348,7 +348,7 @@ struct p_header80 { struct p_header95 { u16 magic; /* use DRBD_MAGIC_BIG here */ u16 command; - u32 length; + u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */ u8 payload[0]; } __packed; diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 0b24ded6fffd..4ac33f34b77e 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -143,7 +143,7 @@ #define DRBD_C_DELAY_TARGET_DEF 10 #define DRBD_C_FILL_TARGET_MIN 0 -#define DRBD_C_FILL_TARGET_MAX 100000 +#define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */ #define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */ #define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */ -- cgit v1.2.3-55-g7522 From 22cc37a943832c948808884604ec6f5ff2594c1d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 14 Sep 2010 20:40:41 +0200 Subject: drbd: fix unlikely access after free and list corruption Various cleanup paths have been incomplete, for the very unlikely case that we cannot allocate enough bios from process context when submitting on behalf of the peer or resync process. Never observed. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 25 +++++++++++++++++++++++++ drivers/block/drbd/drbd_worker.c | 7 +++++++ include/linux/drbd.h | 4 ++-- 3 files changed, 34 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 990fe01afa50..71775a9de21d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1573,6 +1573,13 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) return TRUE; + /* drbd_submit_ee currently fails for one reason only: + * not being able to allocate enough bios. + * Is dropping the connection going to help? */ + spin_lock_irq(&mdev->req_lock); + list_del(&e->w.list); + spin_unlock_irq(&mdev->req_lock); + drbd_free_ee(mdev, e); fail: put_ldev(mdev); @@ -1998,6 +2005,16 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) return TRUE; + /* drbd_submit_ee currently fails for one reason only: + * not being able to allocate enough bios. + * Is dropping the connection going to help? */ + spin_lock_irq(&mdev->req_lock); + list_del(&e->w.list); + hlist_del_init(&e->colision); + spin_unlock_irq(&mdev->req_lock); + if (e->flags & EE_CALL_AL_COMPLETE_IO) + drbd_al_complete_io(mdev, e->sector); + out_interrupted: /* yes, the epoch_size now is imbalanced. * but we drop the connection anyways, so we don't have a chance to @@ -2202,6 +2219,14 @@ submit: if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) return TRUE; + /* drbd_submit_ee currently fails for one reason only: + * not being able to allocate enough bios. + * Is dropping the connection going to help? */ + spin_lock_irq(&mdev->req_lock); + list_del(&e->w.list); + spin_unlock_irq(&mdev->req_lock); + /* no drbd_rs_complete_io(), we are dropping the connection anyways */ + out_free_e: put_ldev(mdev); drbd_free_ee(mdev, e); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 88be45ad84ed..f12822d53867 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -387,6 +387,13 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) return 0; + /* drbd_submit_ee currently fails for one reason only: + * not being able to allocate enough bios. + * Is dropping the connection going to help? */ + spin_lock_irq(&mdev->req_lock); + list_del(&e->w.list); + spin_unlock_irq(&mdev->req_lock); + drbd_free_ee(mdev, e); defer: put_ldev(mdev); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 5e72a5d3d48f..da7d9bd4f3f0 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,10 +53,10 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.8.1" +#define REL_VERSION "8.3.9rc1" #define API_VERSION 88 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 94 +#define PRO_VERSION_MAX 95 enum drbd_io_error_p { -- cgit v1.2.3-55-g7522 From be70e2671b95a8982ff133ebaafff6399ad393d4 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 14 Oct 2010 11:58:20 +0200 Subject: dynamic_debug.h: Fix dynamic_dev_dbg() macro if CONFIG_DYNAMIC_DEBUG not set Signed-off-by: Philipp Reisner --- include/linux/dynamic_debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 52c0da4bdd18..81bc20e36ec0 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -80,7 +80,7 @@ static inline int ddebug_remove_module(const char *mod) #define dynamic_pr_debug(fmt, ...) \ do { if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0) -#define dynamic_dev_dbg(dev, format, ...) \ +#define dynamic_dev_dbg(dev, fmt, ...) \ do { if (0) dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); } while (0) #endif -- cgit v1.2.3-55-g7522 From 5dbfe7aedf54aa7f62fd659e34371d4ea0e7bffe Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 15 Oct 2010 09:52:46 +0200 Subject: drbd: add race-breaker to drbd_go_diskless This adds a necessary race breaker to these commits: drbd: fix for possible deadlock on IO error during resync drbd: drop wrong debug asserts, fix recently introduced race What we do is get a refcount, check the state, then depending on the state and the requested minimum disk state, either hold it (success), or give it back immediately (failed "try lock"). Some code paths (flushing of drbd metadata) may still grab and hold a refcount even if we are D_FAILED (application IO won't). So even if we hit local_cnt == 0 once after being D_FAILED, we still need to wait for that again after we changed to D_DISKLESS. Once local_cnt reaches 0 while we are D_DISKLESS, we can be sure that no one will look at the protected members anymore, so only then is it safe to free them. We cannot easily convert to standard locking primitives here, as we want to be able to use it in atomic context (we always do a "try lock"), as well as hold references for a "long time" (from IO submission to completion callback). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 3 +++ include/linux/drbd.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index bbe3bff2cad6..8bfedc7164fa 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3763,6 +3763,9 @@ static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused * the protected members anymore, though, so in the after_state_ch work * it will be safe to free them. */ drbd_force_state(mdev, NS(disk, D_DISKLESS)); + /* We need to wait for return of references checked out while we still + * have been D_FAILED, though (drbd_md_sync, bitmap io). */ + wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); clear_bit(GO_DISKLESS, &mdev->flags); return 1; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index da7d9bd4f3f0..9b2a0158f399 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,7 +53,7 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.9rc1" +#define REL_VERSION "8.3.9rc2" #define API_VERSION 88 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 95 -- cgit v1.2.3-55-g7522 From 6362beea8914cbd4630ccde3617d944aeca2d48f Mon Sep 17 00:00:00 2001 From: Mike Miller Date: Tue, 19 Oct 2010 09:40:34 +0200 Subject: cciss: fix PCI IDs for new Smart Array controllers cciss: fix PCI IDs for new controllers This patch fixes the botched up PCI IDs of new controllers. Please consider this patch for inclusion. Signed-off-by: Mike Miller Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 22 ++++++++++++---------- include/linux/pci_ids.h | 1 + 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index df2ed4d6684d..41f9acb8ecd7 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -105,11 +105,12 @@ static const struct pci_device_id cciss_pci_device_id[] = { {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3249}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324A}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324B}, - {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3250}, - {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3251}, - {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3252}, - {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3253}, - {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3254}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3350}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3351}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3352}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3353}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3354}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3355}, {0,} }; @@ -149,11 +150,12 @@ static struct board_type products[] = { {0x3249103C, "Smart Array P812", &SA5_access}, {0x324A103C, "Smart Array P712m", &SA5_access}, {0x324B103C, "Smart Array P711m", &SA5_access}, - {0x3250103C, "Smart Array", &SA5_access}, - {0x3251103C, "Smart Array", &SA5_access}, - {0x3252103C, "Smart Array", &SA5_access}, - {0x3253103C, "Smart Array", &SA5_access}, - {0x3254103C, "Smart Array", &SA5_access}, + {0x3350103C, "Smart Array", &SA5_access}, + {0x3351103C, "Smart Array", &SA5_access}, + {0x3352103C, "Smart Array", &SA5_access}, + {0x3353103C, "Smart Array", &SA5_access}, + {0x3354103C, "Smart Array", &SA5_access}, + {0x3355103C, "Smart Array", &SA5_access}, }; /* How long to wait (in milliseconds) for board to go into simple mode */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f6a3b2d36cad..fe7d72ce33af 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -739,6 +739,7 @@ #define PCI_DEVICE_ID_HP_CISSC 0x3230 #define PCI_DEVICE_ID_HP_CISSD 0x3238 #define PCI_DEVICE_ID_HP_CISSE 0x323a +#define PCI_DEVICE_ID_HP_CISSF 0x323b #define PCI_DEVICE_ID_HP_ZX2_IOC 0x4031 #define PCI_VENDOR_ID_PCTECH 0x1042 -- cgit v1.2.3-55-g7522