from:"Philipp Reisner"

[PATCH 04/18] drbd: use the cached meta_dev_idx

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

Now we have the cached meta_dev_idx member,
we can get rid of a few rcu_read_lock() sections and rcu_dereference().

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_int.h |   32 +---
 drivers/block/drbd/drbd_nl.c  |7 +--
 2 files changed, 6 insertions(+), 33 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index ee19ba2..6eecdec 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1777,9 +1777,9 @@ static inline void drbd_chk_io_error_(struct drbd_conf 
*mdev,
  * BTW, for internal meta data, this happens to be the maximum capacity
  * we could agree upon with our peer node.
  */
-static inline sector_t _drbd_md_first_sector(int meta_dev_idx, struct 
drbd_backing_dev *bdev)
+static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
 {
-   switch (meta_dev_idx) {
+   switch (bdev->md.meta_dev_idx) {
case DRBD_MD_INDEX_INTERNAL:
case DRBD_MD_INDEX_FLEX_INT:
return bdev->md.md_offset + bdev->md.bm_offset;
@@ -1789,30 +1789,13 @@ static inline sector_t _drbd_md_first_sector(int 
meta_dev_idx, struct drbd_backi
}
 }
 
-static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
-{
-   int meta_dev_idx;
-
-   rcu_read_lock();
-   meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
-   rcu_read_unlock();
-
-   return _drbd_md_first_sector(meta_dev_idx, bdev);
-}
-
 /**
  * drbd_md_last_sector() - Return the last sector number of the meta data area
  * @bdev:  Meta data block device.
  */
 static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev)
 {
-   int meta_dev_idx;
-
-   rcu_read_lock();
-   meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
-   rcu_read_unlock();
-
-   switch (meta_dev_idx) {
+   switch (bdev->md.meta_dev_idx) {
case DRBD_MD_INDEX_INTERNAL:
case DRBD_MD_INDEX_FLEX_INT:
return bdev->md.md_offset + MD_4kB_SECT -1;
@@ -1840,18 +1823,13 @@ static inline sector_t drbd_get_capacity(struct 
block_device *bdev)
 static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev)
 {
sector_t s;
-   int meta_dev_idx;
-
-   rcu_read_lock();
-   meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
-   rcu_read_unlock();
 
-   switch (meta_dev_idx) {
+   switch (bdev->md.meta_dev_idx) {
case DRBD_MD_INDEX_INTERNAL:
case DRBD_MD_INDEX_FLEX_INT:
s = drbd_get_capacity(bdev->backing_bdev)
? min_t(sector_t, DRBD_MAX_SECTORS_FLEX,
-   _drbd_md_first_sector(meta_dev_idx, bdev))
+   drbd_md_first_sector(bdev))
: 0;
break;
case DRBD_MD_INDEX_FLEX_EXT:
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 104b7ce..5621df8 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -722,14 +722,10 @@ static void drbd_md_set_sector_offsets(struct drbd_conf 
*mdev,
 {
sector_t md_size_sect = 0;
unsigned int al_size_sect = MD_32kB_SECT;
-   int meta_dev_idx;
-
-   rcu_read_lock();
-   meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
 
bdev->md.md_offset = drbd_md_ss(bdev);
 
-   switch (meta_dev_idx) {
+   switch (bdev->md.meta_dev_idx) {
default:
/* v07 style fixed size indexed meta data */
bdev->md.md_size_sect = MD_128MB_SECT;
@@ -761,7 +757,6 @@ static void drbd_md_set_sector_offsets(struct drbd_conf 
*mdev,
bdev->md.bm_offset   = -md_size_sect + MD_4kB_SECT;
break;
}
-   rcu_read_unlock();
 }
 
 /* input size is expected to be in KB */
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 00/18] RFC: Non blocking submit for activity log misses

2013-03-19 Thread Philipp Reisner

The Issues

Since the beginning DRBD was written with the assumption that the write
pattern has spacial locality. (This assumption was driven from the fact,
that rotating media performs better if you do not send the head too far too
often)

Backed by this assumption a caller that submits a request that is outside of
the current active set, was blocked until the active set was changed.
(Changing the active set is a synchronous write operation to the meta-data
area on the backing storage = "an AL-update" in DRBD-speak)

A second effect was that DRBD's meta-data was located in a very narrow
area. When DRBD is used on top of a RAID0 stripe set, this causes all
AL-updates to got to the same disk.


The Proposed Solution

This patch series improves DRBD's behavior. A submitter is no longer blocked
in the case of a AL-miss. For this a dedicated submitter worker is introduced
(patch 13).

In order to better distribute the AL-updates to more disks in a stripe set
this patch series also introduces an optional striped layout of the part
of the meta-data that holds the AL-updates (patch 4).


The Results

This of course drastically improves DRBD's performance if the write pattern
does not have any spacial locality. E.g. random writes spread out over the
whole device.

In the test systems we have SSDs with are able to do up to 5 writes per
second. The test does random distributed writes over a work set size of
128GiB with IO depths from 1 to 1024.

At an IO depth of 64:
without this patch we observed ~100 IOPs.
With this patches we observed about 2 IOPs.

Please find charts of the results here:
http://blogs.linbit.com/p/469/843-random-writes-faster/


Lars Ellenberg (18):
  drbd: cleanup bogus assert message
  drbd: cleanup ondisk meta data layout calculations and defines
  drbd: prepare for new striped layout of activity log
  drbd: use the cached meta_dev_idx
  drbd: mechanically rename la_size to la_size_sect
  drbd: read meta data early, base on-disk offsets on super block
  drbd: Clarify when activity log I/O is delegated to the worker thread
  drbd: drbd_al_being_io: short circuit to reduce latency
  drbd: split __drbd_make_request in before and after drbd_al_begin_io
  drbd: prepare to queue write requests on a submit worker
  drbd: split drbd_al_begin_io into fastpath, prepare, and commit
  drbd: split out some helper functions to drbd_al_begin_io
  drbd: queue writes on submitter thread, unless they pass the activity
log fastpath
  lru_cache: introduce lc_get_cumulative()
  drbd: consolidate as many updates as possible into one AL transaction
  drbd: move start io accounting before activity log transaction
  drbd: try hard to max out the updates per AL transaction
  drbd: adjust upper limit for activity log extents

 drivers/block/drbd/drbd_actlog.c   |  246 +++-
 drivers/block/drbd/drbd_bitmap.c   |   13 +-
 drivers/block/drbd/drbd_int.h  |  179 +-
 drivers/block/drbd/drbd_main.c |  243 +--
 drivers/block/drbd/drbd_nl.c   |  129 ---
 drivers/block/drbd/drbd_receiver.c |4 +-
 drivers/block/drbd/drbd_req.c  |  166 +---
 drivers/block/drbd/drbd_worker.c   |5 +-
 include/linux/drbd_limits.h|   11 +-
 include/linux/lru_cache.h  |1 +
 lib/lru_cache.c|   55 ++--
 11 files changed, 782 insertions(+), 270 deletions(-)

-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 03/18] drbd: prepare for new striped layout of activity log

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

Introduce two new on-disk meta data fields: al_stripes and al_stripe_size_4k
The intended use case is activity log on RAID 0 or similar.
Logically consecutive transactions will advance their on-disk position
by al_stripe_size_4k 4kB (transaction sized) blocks.

Right now, these are still asserted to be the backward compatible
values al_stripes = 1, al_stripe_size_4k = 8 (which amounts to 32kB).

Also introduce a caching member for meta_dev_idx in the in-core
structure: even though it is initially passed in in the rcu-protected
disk_conf structure, it cannot change without a detach/attach cycle.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_actlog.c |6 +--
 drivers/block/drbd/drbd_int.h|   46 ++-
 drivers/block/drbd/drbd_main.c   |   77 ++
 drivers/block/drbd/drbd_nl.c |5 +--
 4 files changed, 94 insertions(+), 40 deletions(-)

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index b230d91..7e7680e 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -353,11 +353,11 @@ static unsigned int rs_extent_to_bm_page(unsigned int 
rs_enr)
 
 static sector_t al_tr_number_to_on_disk_sector(struct drbd_conf *mdev)
 {
-   const unsigned int stripes = 1;
-   const unsigned int stripe_size_4kB = MD_32kB_SECT/MD_4kB_SECT;
+   const unsigned int stripes = mdev->ldev->md.al_stripes;
+   const unsigned int stripe_size_4kB = mdev->ldev->md.al_stripe_size_4k;
 
/* transaction number, modulo on-disk ring buffer wrap around */
-   unsigned int t = mdev->al_tr_number % (stripe_size_4kB * stripes);
+   unsigned int t = mdev->al_tr_number % (mdev->ldev->md.al_size_4k);
 
/* ... to aligned 4k on disk block */
t = ((t % stripes) * stripe_size_4kB) + t/stripes;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 60c89e5..ee19ba2 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -755,6 +755,14 @@ struct drbd_md {
 
s32 al_offset;  /* signed relative sector offset to activity log */
s32 bm_offset;  /* signed relative sector offset to bitmap */
+
+   /* cached value of bdev->disk_conf->meta_dev_idx (see below) */
+   s32 meta_dev_idx;
+
+   /* see al_tr_number_to_on_disk_sector() */
+   u32 al_stripes;
+   u32 al_stripe_size_4k;
+   u32 al_size_4k; /* cached product of the above */
 };
 
 struct drbd_backing_dev {
@@ -1862,38 +1870,24 @@ static inline sector_t drbd_get_max_capacity(struct 
drbd_backing_dev *bdev)
 }
 
 /**
- * drbd_md_ss__() - Return the sector number of our meta data super block
- * @mdev:  DRBD device.
+ * drbd_md_ss() - Return the sector number of our meta data super block
  * @bdev:  Meta data block device.
  */
-static inline sector_t drbd_md_ss__(struct drbd_conf *mdev,
-   struct drbd_backing_dev *bdev)
+static inline sector_t drbd_md_ss(struct drbd_backing_dev *bdev)
 {
-   int meta_dev_idx;
+   const int meta_dev_idx = bdev->md.meta_dev_idx;
 
-   rcu_read_lock();
-   meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
-   rcu_read_unlock();
+   if (meta_dev_idx == DRBD_MD_INDEX_FLEX_EXT)
+   return 0;
 
-   switch (meta_dev_idx) {
-   default: /* external, some index; this is the old fixed size layout */
-   return MD_128MB_SECT * meta_dev_idx;
-   case DRBD_MD_INDEX_INTERNAL:
-   /* with drbd08, internal meta data is always "flexible" */
-   case DRBD_MD_INDEX_FLEX_INT:
-   if (!bdev->backing_bdev) {
-   if (__ratelimit(&drbd_ratelimit_state)) {
-   dev_err(DEV, "bdev->backing_bdev==NULL\n");
-   dump_stack();
-   }
-   return 0;
-   }
-   /* sizeof(struct md_on_disk_07) == 4k
-* position: last 4k aligned block of 4k size */
+   /* Since drbd08, internal meta data is always "flexible".
+* position: last 4k aligned block of 4k size */
+   if (meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
+   meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)
return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) - 8;
-   case DRBD_MD_INDEX_FLEX_EXT:
-   return 0;
-   }
+
+   /* external, some index; this is the old fixed size layout */
+   return MD_128MB_SECT * bdev->md.meta_dev_idx;
 }
 
 static inline void
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 6c4f0ff..b9bfb10 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2853,7 +2853,11 @@ struct meta_data_on_disk {
u3

[PATCH 17/18] drbd: try hard to max out the updates per AL transaction

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

There may have been more incoming requests while we where preparing
the current transaction. Try to consolidate more updates into this
transaction until we make no more progres.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_req.c |   31 +++
 1 file changed, 31 insertions(+)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index d72f2fe..9f7ff1c 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1226,6 +1226,37 @@ void do_submit(struct work_struct *ws)
break;
 
wait_event(mdev->al_wait, prepare_al_transaction_nonblock(mdev, 
&incoming, &pending));
+   /* Maybe more was queued, while we prepared the transaction?
+* Try to stuff them into this transaction as well.
+* Be strictly non-blocking here, no wait_event, we already
+* have something to commit.
+* Stop if we don't make any more progres.
+*/
+   for (;;) {
+   LIST_HEAD(more_pending);
+   LIST_HEAD(more_incoming);
+   bool made_progress;
+
+   /* It is ok to look outside the lock,
+* it's only an optimization anyways */
+   if (list_empty(&mdev->submit.writes))
+   break;
+
+   spin_lock(&mdev->submit.lock);
+   list_splice_tail_init(&mdev->submit.writes, 
&more_incoming);
+   spin_unlock(&mdev->submit.lock);
+
+   if (list_empty(&more_incoming))
+   break;
+
+   made_progress = prepare_al_transaction_nonblock(mdev, 
&more_incoming, &more_pending);
+
+   list_splice_tail_init(&more_pending, &pending);
+   list_splice_tail_init(&more_incoming, &incoming);
+
+   if (!made_progress)
+   break;
+   }
drbd_al_begin_io_commit(mdev, false);
 
list_for_each_entry_safe(req, tmp, &pending, tl_requests) {
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 18/18] drbd: adjust upper limit for activity log extents

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

Now that the on-disk activity-log ring buffer size is adjustable,
the maximum active set can become larger, and is now limited by
the use of 16bit "labels".

This increases the maximum working set from 6433 to 65534 extents,
each of which covers an area of 4MiB.
Which means that if you use the maximum, you'd have to resync
more than 250 GiB after an unclean Primary shutdown.
With capable backend storage and replication links,
this is entirely feasible.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_nl.c |   47 ++
 include/linux/drbd_limits.h  |   11 +-
 2 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index bcf900b..42fda4a 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1141,15 +1141,32 @@ static bool should_set_defaults(struct genl_info *info)
return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
 }
 
-static void enforce_disk_conf_limits(struct disk_conf *dc)
+static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
 {
-   if (dc->al_extents < DRBD_AL_EXTENTS_MIN)
-   dc->al_extents = DRBD_AL_EXTENTS_MIN;
-   if (dc->al_extents > DRBD_AL_EXTENTS_MAX)
-   dc->al_extents = DRBD_AL_EXTENTS_MAX;
+   /* This is limited by 16 bit "slot" numbers,
+* and by available on-disk context storage.
+*
+* Also (u16)~0 is special (denotes a "free" extent).
+*
+* One transaction occupies one 4kB on-disk block,
+* we have n such blocks in the on disk ring buffer,
+* the "current" transaction may fail (n-1),
+* and there is 919 slot numbers context information per transaction.
+*
+* 72 transaction blocks amounts to more than 2**16 context slots,
+* so cap there first.
+*/
+   const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
+   const unsigned int sufficient_on_disk =
+   (max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
+   /AL_CONTEXT_PER_TRANSACTION;
 
-   if (dc->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
-   dc->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
+   unsigned int al_size_4k = bdev->md.al_size_4k;
+
+   if (al_size_4k > sufficient_on_disk)
+   return max_al_nr;
+
+   return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
 }
 
 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
@@ -1196,7 +1213,13 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct 
genl_info *info)
if (!expect(new_disk_conf->resync_rate >= 1))
new_disk_conf->resync_rate = 1;
 
-   enforce_disk_conf_limits(new_disk_conf);
+   if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
+   new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
+   if (new_disk_conf->al_extents > drbd_al_extents_max(mdev->ldev))
+   new_disk_conf->al_extents = drbd_al_extents_max(mdev->ldev);
+
+   if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
+   new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
 
fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
if (fifo_size != mdev->rs_plan_s->size) {
@@ -1344,7 +1367,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info 
*info)
goto fail;
}
 
-   enforce_disk_conf_limits(new_disk_conf);
+   if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
+   new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
 
new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / 
HZ);
if (!new_plan) {
@@ -1419,6 +1443,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct 
genl_info *info)
if (retcode != NO_ERROR)
goto fail;
 
+   if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
+   new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
+   if (new_disk_conf->al_extents > drbd_al_extents_max(nbc))
+   new_disk_conf->al_extents = drbd_al_extents_max(nbc);
+
if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
(unsigned long long) drbd_get_max_capacity(nbc),
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 1fa19c5..1fedf2b 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -126,13 +126,12 @@
 #define DRBD_RESYNC_RATE_DEF 250
 #define DRBD_RESYNC_RATE_SCALE 'k'  /* kilobytes */
 
-  /* less than 7 would hit performance unnecessarily.
-   * 919 slots context information per transaction,
-   * 32k a

[PATCH 16/18] drbd: move start io accounting before activity log transaction

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

The IO accounting of the drbd "queue depth" was misleading.
We only started IO accounting once we already wrote the activity log.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_req.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index b923d41..d72f2fe 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1056,6 +1056,9 @@ drbd_request_prepare(struct drbd_conf *mdev, struct bio 
*bio, unsigned long star
req->private_bio = NULL;
}
 
+   /* Update disk stats */
+   _drbd_start_io_acct(mdev, req);
+
if (rw == WRITE && req->private_bio && req->i.size
&& !test_bit(AL_SUSPENDED, &mdev->flags)) {
if (!drbd_al_begin_io_fastpath(mdev, &req->i)) {
@@ -1095,9 +1098,6 @@ static void drbd_send_and_submit(struct drbd_conf *mdev, 
struct drbd_request *re
goto out;
}
 
-   /* Update disk stats */
-   _drbd_start_io_acct(mdev, req);
-
/* We fail READ/READA early, if we can not serve it.
 * We must do this before req is registered on any lists.
 * Otherwise, drbd_req_complete() will queue failed READ for retry. */
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 14/18] lru_cache: introduce lc_get_cumulative()

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

New helper to be able to consolidate more updates
into a single transaction.
Without this, we can only grab a single refcount
on an updated element while preparing a transaction.

lc_get_cumulative - like lc_get; also finds to-be-changed elements
  @lc: the lru cache to operate on
  @enr: the label to look up

  Unlike lc_get this also returns the element for @enr, if it is belonging to
  a pending transaction, so the return values are like for lc_get(),
  plus:

  pointer to an element already on the "to_be_changed" list.
  In this case, the cache was already marked %LC_DIRTY.

  Caller needs to make sure that the pending transaction is completed,
  before proceeding to actually use this element.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 include/linux/lru_cache.h |1 +
 lib/lru_cache.c   |   55 -
 2 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index 4019013..4626228 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -256,6 +256,7 @@ extern void lc_destroy(struct lru_cache *lc);
 extern void lc_set(struct lru_cache *lc, unsigned int enr, int index);
 extern void lc_del(struct lru_cache *lc, struct lc_element *element);
 
+extern struct lc_element *lc_get_cumulative(struct lru_cache *lc, unsigned int 
enr);
 extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr);
 extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr);
 extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr);
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index d71d894..e8d5003 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -366,7 +366,13 @@ static int lc_unused_element_available(struct lru_cache 
*lc)
return 0;
 }
 
-static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, 
bool may_change)
+/* used as internal flags to __lc_get */
+enum {
+   LC_GET_MAY_CHANGE = 1,
+   LC_GET_MAY_USE_UNCOMMITTED = 2,
+};
+
+static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, 
unsigned int flags)
 {
struct lc_element *e;
 
@@ -381,22 +387,31 @@ static struct lc_element *__lc_get(struct lru_cache *lc, 
unsigned int enr, bool
 * this enr is currently being pulled in already,
 * and will be available once the pending transaction
 * has been committed. */
-   if (e && e->lc_new_number == e->lc_number) {
+   if (e) {
+   if (e->lc_new_number != e->lc_number) {
+   /* It has been found above, but on the "to_be_changed"
+* list, not yet committed.  Don't pull it in twice,
+* wait for the transaction, then try again...
+*/
+   if (!(flags & LC_GET_MAY_USE_UNCOMMITTED))
+   RETURN(NULL);
+   /* ... unless the caller is aware of the implications,
+* probably preparing a cumulative transaction. */
+   ++e->refcnt;
+   ++lc->hits;
+   RETURN(e);
+   }
+   /* else: lc_new_number == lc_number; a real hit. */
++lc->hits;
if (e->refcnt++ == 0)
lc->used++;
list_move(&e->list, &lc->in_use); /* Not evictable... */
RETURN(e);
}
+   /* e == NULL */
 
++lc->misses;
-   if (!may_change)
-   RETURN(NULL);
-
-   /* It has been found above, but on the "to_be_changed" list, not yet
-* committed.  Don't pull it in twice, wait for the transaction, then
-* try again */
-   if (e)
+   if (!(flags & LC_GET_MAY_CHANGE))
RETURN(NULL);
 
/* To avoid races with lc_try_lock(), first, mark us dirty
@@ -478,7 +493,27 @@ static struct lc_element *__lc_get(struct lru_cache *lc, 
unsigned int enr, bool
  */
 struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
 {
-   return __lc_get(lc, enr, 1);
+   return __lc_get(lc, enr, LC_GET_MAY_CHANGE);
+}
+
+/**
+ * lc_get_cumulative - like lc_get; also finds to-be-changed elements
+ * @lc: the lru cache to operate on
+ * @enr: the label to look up
+ *
+ * Unlike lc_get this also returns the element for @enr, if it is belonging to
+ * a pending transaction, so the return values are like for lc_get(),
+ * plus:
+ *
+ * pointer to an element already on the "to_be_changed" list.
+ * In this case, the cache was already marked %LC_DIRTY.
+ *
+ * Caller needs to make sure that the pending transaction is completed,
+ * before proceeding to actually use this element.
+ */
+struct lc_element *lc_get_cumulative(struct lru

[PATCH 13/18] drbd: queue writes on submitter thread, unless they pass the activity log fastpath

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_req.c |   20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 4af709e..43bc1d0 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1020,6 +1020,14 @@ drbd_submit_req_private_bio(struct drbd_request *req)
bio_endio(bio, -EIO);
 }
 
+static void drbd_queue_write(struct drbd_conf *mdev, struct drbd_request *req)
+{
+   spin_lock(&mdev->submit.lock);
+   list_add_tail(&req->tl_requests, &mdev->submit.writes);
+   spin_unlock(&mdev->submit.lock);
+   queue_work(mdev->submit.wq, &mdev->submit.worker);
+}
+
 /* returns the new drbd_request pointer, if the caller is expected to
  * drbd_send_and_submit() it (to save latency), or NULL if we queued the
  * request on the submitter thread.
@@ -1048,17 +1056,13 @@ drbd_request_prepare(struct drbd_conf *mdev, struct bio 
*bio, unsigned long star
req->private_bio = NULL;
}
 
-   /* For WRITES going to the local disk, grab a reference on the target
-* extent.  This waits for any resync activity in the corresponding
-* resync extent to finish, and, if necessary, pulls in the target
-* extent into the activity log, which involves further disk io because
-* of transactional on-disk meta data updates.
-* Empty flushes don't need to go into the activity log, they can only
-* flush data for pending writes which are already in there. */
if (rw == WRITE && req->private_bio && req->i.size
&& !test_bit(AL_SUSPENDED, &mdev->flags)) {
+   if (!drbd_al_begin_io_fastpath(mdev, &req->i)) {
+   drbd_queue_write(mdev, req);
+   return NULL;
+   }
req->rq_state |= RQ_IN_ACT_LOG;
-   drbd_al_begin_io(mdev, &req->i, true);
}
 
return req;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 15/18] drbd: consolidate as many updates as possible into one AL transaction

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

Depending on current IO depth, try to consolidate as many updates
as possible into one activity log transaction.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_actlog.c |   49 ++
 drivers/block/drbd/drbd_int.h|2 ++
 drivers/block/drbd/drbd_req.c|   70 ++
 3 files changed, 107 insertions(+), 14 deletions(-)

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index ff03f90..6afe173 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -359,6 +359,55 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct 
drbd_interval *i, bool dele
drbd_al_begin_io_commit(mdev, delegate);
 }
 
+int drbd_al_begin_io_nonblock(struct drbd_conf *mdev, struct drbd_interval *i)
+{
+   struct lru_cache *al = mdev->act_log;
+   /* for bios crossing activity log extent boundaries,
+* we may need to activate two extents in one go */
+   unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
+   unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) 
>> (AL_EXTENT_SHIFT-9);
+   unsigned nr_al_extents;
+   unsigned available_update_slots;
+   unsigned enr;
+
+   D_ASSERT(first <= last);
+
+   nr_al_extents = 1 + last - first; /* worst case: all touched extends 
are cold. */
+   available_update_slots = min(al->nr_elements - al->used,
+   al->max_pending_changes - al->pending_changes);
+
+   /* We want all necessary updates for a given request within the same 
transaction
+* We could first check how many updates are *actually* needed,
+* and use that instead of the worst-case nr_al_extents */
+   if (available_update_slots < nr_al_extents)
+   return -EWOULDBLOCK;
+
+   /* Is resync active in this area? */
+   for (enr = first; enr <= last; enr++) {
+   struct lc_element *tmp;
+   tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT);
+   if (unlikely(tmp != NULL)) {
+   struct bm_extent  *bm_ext = lc_entry(tmp, struct 
bm_extent, lce);
+   if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
+   if (!test_and_set_bit(BME_PRIORITY, 
&bm_ext->flags));
+   return -EBUSY;
+   return -EWOULDBLOCK;
+   }
+   }
+   }
+
+   /* Checkout the refcounts.
+* Given that we checked for available elements and update slots above,
+* this has to be successful. */
+   for (enr = first; enr <= last; enr++) {
+   struct lc_element *al_ext;
+   al_ext = lc_get_cumulative(mdev->act_log, enr);
+   if (!al_ext)
+   dev_info(DEV, "LOGIC BUG for enr=%u\n", enr);
+   }
+   return 0;
+}
+
 void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i)
 {
/* for bios crossing activity log extent boundaries,
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index b7b52dd..f943aac 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1611,6 +1611,8 @@ extern const char *drbd_conn_str(enum drbd_conns s);
 extern const char *drbd_role_str(enum drbd_role s);
 
 /* drbd_actlog.c */
+extern int drbd_al_begin_io_nonblock(struct drbd_conf *mdev, struct 
drbd_interval *i);
+extern void drbd_al_begin_io_commit(struct drbd_conf *mdev, bool delegate);
 extern bool drbd_al_begin_io_fastpath(struct drbd_conf *mdev, struct 
drbd_interval *i);
 extern void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, 
bool delegate);
 extern void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval 
*i);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 43bc1d0..b923d41 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1164,32 +1164,74 @@ void __drbd_make_request(struct drbd_conf *mdev, struct 
bio *bio, unsigned long
drbd_send_and_submit(mdev, req);
 }
 
-void __drbd_make_request_from_worker(struct drbd_conf *mdev, struct 
drbd_request *req)
+static void submit_fast_path(struct drbd_conf *mdev, struct list_head 
*incoming)
 {
-   const int rw = bio_rw(req->master_bio);
+   struct drbd_request *req, *tmp;
+   list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
+   const int rw = bio_data_dir(req->master_bio);
 
-   if (rw == WRITE && req->private_bio && req->i.size
-   && !test_bit(AL_SUSPENDED, &mdev->flags)) {
-   drbd_al_begin_io(mdev, &req->i, false);
-   req->rq_state |= RQ_I

[PATCH 11/18] drbd: split drbd_al_begin_io into fastpath, prepare, and commit

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_actlog.c |  104 ++
 drivers/block/drbd/drbd_int.h|1 +
 2 files changed, 72 insertions(+), 33 deletions(-)

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 1d7244d..e4f1231 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -104,7 +104,6 @@ struct update_al_work {
int err;
 };
 
-static int al_write_transaction(struct drbd_conf *mdev, bool delegate);
 
 void *drbd_md_get_buffer(struct drbd_conf *mdev)
 {
@@ -246,30 +245,37 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, 
unsigned int enr)
return al_ext;
 }
 
-/*
- * @delegate:   delegate activity log I/O to the worker thread
- */
-void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool 
delegate)
+bool drbd_al_begin_io_fastpath(struct drbd_conf *mdev, struct drbd_interval *i)
 {
/* for bios crossing activity log extent boundaries,
 * we may need to activate two extents in one go */
unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) 
>> (AL_EXTENT_SHIFT-9);
-   unsigned enr;
-   bool need_transaction = false;
-   bool locked = false;
+   bool fastpath_ok = true;
 
-   /* When called through generic_make_request(), we must delegate
-* activity log I/O to the worker thread: a further request
-* submitted via generic_make_request() within the same task
-* would be queued on current->bio_list, and would only start
-* after this function returns (see generic_make_request()).
-*
-* However, if we *are* the worker, we must not delegate to ourselves.
-*/
+   D_ASSERT((unsigned)(last - first) <= 1);
+   D_ASSERT(atomic_read(&mdev->local_cnt) > 0);
+
+   /* FIXME figure out a fast path for bios crossing AL extent boundaries 
*/
+   if (first != last)
+   return false;
+
+   spin_lock_irq(&mdev->al_lock);
+   fastpath_ok =
+   lc_find(mdev->resync, first/AL_EXT_PER_BM_SECT) == NULL &&
+   lc_try_get(mdev->act_log, first) != NULL;
+   spin_unlock_irq(&mdev->al_lock);
+   return fastpath_ok;
+}
 
-   if (delegate)
-   BUG_ON(current == mdev->tconn->worker.task);
+bool drbd_al_begin_io_prepare(struct drbd_conf *mdev, struct drbd_interval *i)
+{
+   /* for bios crossing activity log extent boundaries,
+* we may need to activate two extents in one go */
+   unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
+   unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) 
>> (AL_EXTENT_SHIFT-9);
+   unsigned enr;
+   bool need_transaction = false;
 
D_ASSERT(first <= last);
D_ASSERT(atomic_read(&mdev->local_cnt) > 0);
@@ -280,11 +286,28 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct 
drbd_interval *i, bool dele
if (al_ext->lc_number != enr)
need_transaction = true;
}
+   return need_transaction;
+}
 
-   /* If *this* request was to an already active extent,
-* we're done, even if there are pending changes. */
-   if (!need_transaction)
-   return;
+static int al_write_transaction(struct drbd_conf *mdev, bool delegate);
+
+/* When called through generic_make_request(), we must delegate
+ * activity log I/O to the worker thread: a further request
+ * submitted via generic_make_request() within the same task
+ * would be queued on current->bio_list, and would only start
+ * after this function returns (see generic_make_request()).
+ *
+ * However, if we *are* the worker, we must not delegate to ourselves.
+ */
+
+/*
+ * @delegate:   delegate activity log I/O to the worker thread
+ */
+void drbd_al_begin_io_commit(struct drbd_conf *mdev, bool delegate)
+{
+   bool locked = false;
+
+   BUG_ON(delegate && current == mdev->tconn->worker.task);
 
/* Serialize multiple transactions.
 * This uses test_and_set_bit, memory barrier is implicit.
@@ -303,11 +326,8 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct 
drbd_interval *i, bool dele
write_al_updates = 
rcu_dereference(mdev->ldev->disk_conf)->al_updates;
rcu_read_unlock();
 
-   if (write_al_updates) {
+   if (write_al_updates)
al_write_transaction(mdev, delegate);
-   mdev->al_writ_cnt++;
-   }
-
spin_lock_irq(&mdev->al_lock);
/* FIXME

[PATCH 12/18] drbd: split out some helper functions to drbd_al_begin_io

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

To make the code easier to follow,
use an explicit find_active_resync_extent(),
and add a "nonblock" parameter to _al_get().

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_actlog.c |   49 ++
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index e4f1231..ff03f90 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -222,25 +222,37 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct 
drbd_backing_dev *bdev,
return err;
 }
 
-static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
+static struct bm_extent *find_active_resync_extent(struct drbd_conf *mdev, 
unsigned int enr)
 {
-   struct lc_element *al_ext;
struct lc_element *tmp;
-   int wake;
-
-   spin_lock_irq(&mdev->al_lock);
tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT);
if (unlikely(tmp != NULL)) {
struct bm_extent  *bm_ext = lc_entry(tmp, struct bm_extent, 
lce);
-   if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
-   wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags);
-   spin_unlock_irq(&mdev->al_lock);
-   if (wake)
-   wake_up(&mdev->al_wait);
-   return NULL;
-   }
+   if (test_bit(BME_NO_WRITES, &bm_ext->flags))
+   return bm_ext;
}
-   al_ext = lc_get(mdev->act_log, enr);
+   return NULL;
+}
+
+static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr, 
bool nonblock)
+{
+   struct lc_element *al_ext;
+   struct bm_extent *bm_ext;
+   int wake;
+
+   spin_lock_irq(&mdev->al_lock);
+   bm_ext = find_active_resync_extent(mdev, enr);
+   if (bm_ext) {
+   wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags);
+   spin_unlock_irq(&mdev->al_lock);
+   if (wake)
+   wake_up(&mdev->al_wait);
+   return NULL;
+   }
+   if (nonblock)
+   al_ext = lc_try_get(mdev->act_log, enr);
+   else
+   al_ext = lc_get(mdev->act_log, enr);
spin_unlock_irq(&mdev->al_lock);
return al_ext;
 }
@@ -251,7 +263,6 @@ bool drbd_al_begin_io_fastpath(struct drbd_conf *mdev, 
struct drbd_interval *i)
 * we may need to activate two extents in one go */
unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) 
>> (AL_EXTENT_SHIFT-9);
-   bool fastpath_ok = true;
 
D_ASSERT((unsigned)(last - first) <= 1);
D_ASSERT(atomic_read(&mdev->local_cnt) > 0);
@@ -260,12 +271,7 @@ bool drbd_al_begin_io_fastpath(struct drbd_conf *mdev, 
struct drbd_interval *i)
if (first != last)
return false;
 
-   spin_lock_irq(&mdev->al_lock);
-   fastpath_ok =
-   lc_find(mdev->resync, first/AL_EXT_PER_BM_SECT) == NULL &&
-   lc_try_get(mdev->act_log, first) != NULL;
-   spin_unlock_irq(&mdev->al_lock);
-   return fastpath_ok;
+   return _al_get(mdev, first, true);
 }
 
 bool drbd_al_begin_io_prepare(struct drbd_conf *mdev, struct drbd_interval *i)
@@ -282,7 +288,8 @@ bool drbd_al_begin_io_prepare(struct drbd_conf *mdev, 
struct drbd_interval *i)
 
for (enr = first; enr <= last; enr++) {
struct lc_element *al_ext;
-   wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr)) != 
NULL);
+   wait_event(mdev->al_wait,
+   (al_ext = _al_get(mdev, enr, false)) != NULL);
if (al_ext->lc_number != enr)
need_transaction = true;
}
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 09/18] drbd: split __drbd_make_request in before and after drbd_al_begin_io

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

This is in preparation to be able to defer requests that need to wait
for an activity log transaction to a submitter workqueue.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_req.c |   40 ++--
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 7d1ff1a..96d5968 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -34,14 +34,14 @@
 static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, 
int size);
 
 /* Update disk stats at start of I/O request */
-static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request 
*req, struct bio *bio)
+static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request 
*req)
 {
-   const int rw = bio_data_dir(bio);
+   const int rw = bio_data_dir(req->master_bio);
int cpu;
cpu = part_stat_lock();
part_round_stats(cpu, &mdev->vdisk->part0);
part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
-   part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));
+   part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], req->i.size >> 9);
(void) cpu; /* The macro invocations above want the cpu argument, I do 
not like
   the compiler warning about cpu only assigned but never 
used... */
part_inc_in_flight(&mdev->vdisk->part0, rw);
@@ -1020,12 +1020,16 @@ drbd_submit_req_private_bio(struct drbd_request *req)
bio_endio(bio, -EIO);
 }
 
-void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned 
long start_time)
+/* returns the new drbd_request pointer, if the caller is expected to
+ * drbd_send_and_submit() it (to save latency), or NULL if we queued the
+ * request on the submitter thread.
+ * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request.
+ */
+struct drbd_request *
+drbd_request_prepare(struct drbd_conf *mdev, struct bio *bio, unsigned long 
start_time)
 {
-   const int rw = bio_rw(bio);
-   struct bio_and_error m = { NULL, };
+   const int rw = bio_data_dir(bio);
struct drbd_request *req;
-   bool no_remote = false;
 
/* allocate outside of all locks; */
req = drbd_req_new(mdev, bio);
@@ -1035,7 +1039,7 @@ void __drbd_make_request(struct drbd_conf *mdev, struct 
bio *bio, unsigned long
 * if user cannot handle io errors, that's not our business. */
dev_err(DEV, "could not kmalloc() req\n");
bio_endio(bio, -ENOMEM);
-   return;
+   return ERR_PTR(-ENOMEM);
}
req->start_time = start_time;
 
@@ -1057,6 +1061,15 @@ void __drbd_make_request(struct drbd_conf *mdev, struct 
bio *bio, unsigned long
drbd_al_begin_io(mdev, &req->i, true);
}
 
+   return req;
+}
+
+static void drbd_send_and_submit(struct drbd_conf *mdev, struct drbd_request 
*req)
+{
+   const int rw = bio_rw(req->master_bio);
+   struct bio_and_error m = { NULL, };
+   bool no_remote = false;
+
spin_lock_irq(&mdev->tconn->req_lock);
if (rw == WRITE) {
/* This may temporarily give up the req_lock,
@@ -1079,7 +1092,7 @@ void __drbd_make_request(struct drbd_conf *mdev, struct 
bio *bio, unsigned long
}
 
/* Update disk stats */
-   _drbd_start_io_acct(mdev, req, bio);
+   _drbd_start_io_acct(mdev, req);
 
/* We fail READ/READA early, if we can not serve it.
 * We must do this before req is registered on any lists.
@@ -1137,7 +1150,14 @@ out:
 
if (m.bio)
complete_master_bio(mdev, &m);
-   return;
+}
+
+void __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned 
long start_time)
+{
+   struct drbd_request *req = drbd_request_prepare(mdev, bio, start_time);
+   if (IS_ERR_OR_NULL(req))
+   return;
+   drbd_send_and_submit(mdev, req);
 }
 
 void drbd_make_request(struct request_queue *q, struct bio *bio)
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 10/18] drbd: prepare to queue write requests on a submit worker

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_int.h  |   13 +
 drivers/block/drbd/drbd_main.c |   25 -
 drivers/block/drbd/drbd_nl.c   |1 +
 drivers/block/drbd/drbd_req.c  |   29 +
 4 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 453fccf..a6b71b6 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -894,6 +894,14 @@ struct drbd_tconn {/* is a 
resource from the config file */
} send;
 };
 
+struct submit_worker {
+   struct workqueue_struct *wq;
+   struct work_struct worker;
+
+   spinlock_t lock;
+   struct list_head writes;
+};
+
 struct drbd_conf {
struct drbd_tconn *tconn;
int vnr;/* volume number within the connection 
*/
@@ -1034,6 +1042,10 @@ struct drbd_conf {
atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
unsigned int peer_max_bio_size;
unsigned int local_max_bio_size;
+
+   /* any requests that would block in drbd_make_request()
+* are deferred to this single-threaded work queue */
+   struct submit_worker submit;
 };
 
 static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
@@ -1440,6 +1452,7 @@ extern void conn_free_crypto(struct drbd_tconn *tconn);
 extern int proc_details;
 
 /* drbd_req */
+extern void do_submit(struct work_struct *ws);
 extern void __drbd_make_request(struct drbd_conf *, struct bio *, unsigned 
long);
 extern void drbd_make_request(struct request_queue *q, struct bio *bio);
 extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 3d212b9..c84226e 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -45,7 +45,7 @@
 #include 
 #include 
 #include 
-
+#include 
 #define __KERNEL_SYSCALLS__
 #include 
 #include 
@@ -2300,6 +2300,7 @@ static void drbd_cleanup(void)
idr_for_each_entry(&minors, mdev, i) {
idr_remove(&minors, mdev_to_minor(mdev));
idr_remove(&mdev->tconn->volumes, mdev->vnr);
+   destroy_workqueue(mdev->submit.wq);
del_gendisk(mdev->vdisk);
/* synchronize_rcu(); No other threads running at this point */
kref_put(&mdev->kref, &drbd_minor_destroy);
@@ -2589,6 +2590,21 @@ void conn_destroy(struct kref *kref)
kfree(tconn);
 }
 
+int init_submitter(struct drbd_conf *mdev)
+{
+   /* opencoded create_singlethread_workqueue(),
+* to be able to say "drbd%d", ..., minor */
+   mdev->submit.wq = alloc_workqueue("drbd%u_submit",
+   WQ_UNBOUND | WQ_MEM_RECLAIM, 1, mdev->minor);
+   if (!mdev->submit.wq)
+   return -ENOMEM;
+
+   INIT_WORK(&mdev->submit.worker, do_submit);
+   spin_lock_init(&mdev->submit.lock);
+   INIT_LIST_HEAD(&mdev->submit.writes);
+   return 0;
+}
+
 enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int 
minor, int vnr)
 {
struct drbd_conf *mdev;
@@ -2679,6 +2695,13 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn 
*tconn, unsigned int minor,
drbd_msg_put_info("requested volume exists already");
goto out_idr_remove_vol;
}
+
+   if (init_submitter(mdev)) {
+   err = ERR_NOMEM;
+   drbd_msg_put_info("unable to create submit workqueue");
+   goto out_idr_remove_vol;
+   }
+
add_disk(disk);
kref_init(&mdev->kref); /* one ref for both idrs and the the add_disk */
 
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 974ea47..bcf900b 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -3173,6 +3173,7 @@ static enum drbd_ret_code adm_delete_minor(struct 
drbd_conf *mdev)
CS_VERBOSE + CS_WAIT_COMPLETE);
idr_remove(&mdev->tconn->volumes, mdev->vnr);
idr_remove(&minors, mdev_to_minor(mdev));
+   destroy_workqueue(mdev->submit.wq);
del_gendisk(mdev->vdisk);
synchronize_rcu();
kref_put(&mdev->kref, &drbd_minor_destroy);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 96d5968..4af709e 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1160,6 +1160,35 @@ void __drbd_make_request(struct drbd_conf *mdev, struct 
bio *bio, unsigned long
drbd_send_and_submit(mdev, req);
 }
 
+void __drbd_make_request_from_worker(struct drbd_c

[PATCH 07/18] drbd: Clarify when activity log I/O is delegated to the worker thread

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_actlog.c   |   49 
 drivers/block/drbd/drbd_int.h  |2 +-
 drivers/block/drbd/drbd_receiver.c |2 +-
 drivers/block/drbd/drbd_req.c  |2 +-
 drivers/block/drbd/drbd_worker.c   |2 +-
 5 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index c79625a..82199d9 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -104,7 +104,7 @@ struct update_al_work {
int err;
 };
 
-static int al_write_transaction(struct drbd_conf *mdev);
+static int al_write_transaction(struct drbd_conf *mdev, bool delegate);
 
 void *drbd_md_get_buffer(struct drbd_conf *mdev)
 {
@@ -246,7 +246,10 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, 
unsigned int enr)
return al_ext;
 }
 
-void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i)
+/*
+ * @delegate:   delegate activity log I/O to the worker thread
+ */
+void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool 
delegate)
 {
/* for bios crossing activity log extent boundaries,
 * we may need to activate two extents in one go */
@@ -255,6 +258,17 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct 
drbd_interval *i)
unsigned enr;
bool locked = false;
 
+   /* When called through generic_make_request(), we must delegate
+* activity log I/O to the worker thread: a further request
+* submitted via generic_make_request() within the same task
+* would be queued on current->bio_list, and would only start
+* after this function returns (see generic_make_request()).
+*
+* However, if we *are* the worker, we must not delegate to ourselves.
+*/
+
+   if (delegate)
+   BUG_ON(current == mdev->tconn->worker.task);
 
D_ASSERT(first <= last);
D_ASSERT(atomic_read(&mdev->local_cnt) > 0);
@@ -270,13 +284,6 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct 
drbd_interval *i)
(locked = lc_try_lock_for_transaction(mdev->act_log)));
 
if (locked) {
-   /* drbd_al_write_transaction(mdev,al_ext,enr);
-* recurses into generic_make_request(), which
-* disallows recursion, bios being serialized on the
-* current->bio_tail list now.
-* we have to delegate updates to the activity log
-* to the worker thread. */
-
/* Double check: it may have been committed by someone else,
 * while we have been waiting for the lock. */
if (mdev->act_log->pending_changes) {
@@ -287,7 +294,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct 
drbd_interval *i)
rcu_read_unlock();
 
if (write_al_updates) {
-   al_write_transaction(mdev);
+   al_write_transaction(mdev, delegate);
mdev->al_writ_cnt++;
}
 
@@ -495,20 +502,18 @@ static int w_al_write_transaction(struct drbd_work *w, 
int unused)
 /* Calls from worker context (see w_restart_disk_io()) need to write the
transaction directly. Others came through generic_make_request(),
those need to delegate it to the worker. */
-static int al_write_transaction(struct drbd_conf *mdev)
+static int al_write_transaction(struct drbd_conf *mdev, bool delegate)
 {
-   struct update_al_work al_work;
-
-   if (current == mdev->tconn->worker.task)
+   if (delegate) {
+   struct update_al_work al_work;
+   init_completion(&al_work.event);
+   al_work.w.cb = w_al_write_transaction;
+   al_work.w.mdev = mdev;
+   drbd_queue_work_front(&mdev->tconn->sender_work, &al_work.w);
+   wait_for_completion(&al_work.event);
+   return al_work.err;
+   } else
return _al_write_transaction(mdev);
-
-   init_completion(&al_work.event);
-   al_work.w.cb = w_al_write_transaction;
-   al_work.w.mdev = mdev;
-   drbd_queue_work_front(&mdev->tconn->sender_work, &al_work.w);
-   wait_for_completion(&al_work.event);
-
-   return al_work.err;
 }
 
 static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext)
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 6eecdec..453fccf 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1598,7 +1598,7 @@ extern const char *drbd_conn_str(enum drbd_conns s);
 extern const char *drbd_role_str(enum drbd_role s);
 
 /* drbd_actlog.c */
-extern void drbd_al_begin_io(struct dr

[PATCH 08/18] drbd: drbd_al_being_io: short circuit to reduce latency

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

A request hitting an already "hot" extent should proceed right away,
even if some other requests need to wait for pending transactions.

Without that short-circuit, several simultaneous make_request contexts
race for committing the transaction, possibly penalizing the innocent.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_actlog.c |   14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 82199d9..1d7244d 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -256,6 +256,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct 
drbd_interval *i, bool dele
unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) 
>> (AL_EXTENT_SHIFT-9);
unsigned enr;
+   bool need_transaction = false;
bool locked = false;
 
/* When called through generic_make_request(), we must delegate
@@ -273,8 +274,17 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct 
drbd_interval *i, bool dele
D_ASSERT(first <= last);
D_ASSERT(atomic_read(&mdev->local_cnt) > 0);
 
-   for (enr = first; enr <= last; enr++)
-   wait_event(mdev->al_wait, _al_get(mdev, enr) != NULL);
+   for (enr = first; enr <= last; enr++) {
+   struct lc_element *al_ext;
+   wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr)) != 
NULL);
+   if (al_ext->lc_number != enr)
+   need_transaction = true;
+   }
+
+   /* If *this* request was to an already active extent,
+* we're done, even if there are pending changes. */
+   if (!need_transaction)
+   return;
 
/* Serialize multiple transactions.
 * This uses test_and_set_bit, memory barrier is implicit.
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 06/18] drbd: read meta data early, base on-disk offsets on super block

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

We used to calculate all on-disk meta data offsets, and then compare
the stored offsets, basically treating them as magic numbers.

Now with the activity log striping, the activity log size is no longer
fixed.  We need to first read the super block, then base the activity
log and bitmap offsets on the stored offsets/al stripe settings.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_actlog.c |   11 +++-
 drivers/block/drbd/drbd_main.c   |  131 +++---
 drivers/block/drbd/drbd_nl.c |   15 ++---
 drivers/block/drbd/drbd_worker.c |3 +-
 4 files changed, 123 insertions(+), 37 deletions(-)

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 7e7680e..c79625a 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -168,7 +168,11 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
bio->bi_end_io = drbd_md_io_complete;
bio->bi_rw = rw;
 
-   if (!get_ldev_if_state(mdev, D_ATTACHING)) {  /* Corresponding put_ldev 
in drbd_md_io_complete() */
+   if (!(rw & WRITE) && mdev->state.disk == D_DISKLESS && mdev->ldev == 
NULL)
+   /* special case, drbd_md_read() during drbd_adm_attach(): no 
get_ldev */
+   ;
+   else if (!get_ldev_if_state(mdev, D_ATTACHING)) {
+   /* Corresponding put_ldev in drbd_md_io_complete() */
dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in 
_drbd_md_sync_page_io()\n");
err = -ENODEV;
goto out;
@@ -199,9 +203,10 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct 
drbd_backing_dev *bdev,
 
BUG_ON(!bdev->md_bdev);
 
-   dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n",
+   dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
 current->comm, current->pid, __func__,
-(unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
+(unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ",
+(void*)_RET_IP_ );
 
if (sector < drbd_md_first_sector(bdev) ||
sector + 7 > drbd_md_last_sector(bdev))
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index d8bbb41..3d212b9 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2971,6 +2971,86 @@ err:
return -EINVAL;
 }
 
+static int check_offsets_and_sizes(struct drbd_conf *mdev, struct 
drbd_backing_dev *bdev)
+{
+   sector_t capacity = drbd_get_capacity(bdev->md_bdev);
+   struct drbd_md *in_core = &bdev->md;
+   s32 on_disk_al_sect;
+   s32 on_disk_bm_sect;
+
+   /* The on-disk size of the activity log, calculated from offsets, and
+* the size of the activity log calculated from the stripe settings,
+* should match.
+* Though we could relax this a bit: it is ok, if the striped activity 
log
+* fits in the available on-disk activity log size.
+* Right now, that would break how resize is implemented.
+* TODO: make drbd_determine_dev_size() (and the drbdmeta tool) aware
+* of possible unused padding space in the on disk layout. */
+   if (in_core->al_offset < 0) {
+   if (in_core->bm_offset > in_core->al_offset)
+   goto err;
+   on_disk_al_sect = -in_core->al_offset;
+   on_disk_bm_sect = in_core->al_offset - in_core->bm_offset;
+   } else {
+   if (in_core->al_offset != MD_4kB_SECT)
+   goto err;
+   if (in_core->bm_offset < in_core->al_offset + 
in_core->al_size_4k * MD_4kB_SECT)
+   goto err;
+
+   on_disk_al_sect = in_core->bm_offset - MD_4kB_SECT;
+   on_disk_bm_sect = in_core->md_size_sect - in_core->bm_offset;
+   }
+
+   /* old fixed size meta data is exactly that: fixed. */
+   if (in_core->meta_dev_idx >= 0) {
+   if (in_core->md_size_sect != MD_128MB_SECT
+   ||  in_core->al_offset != MD_4kB_SECT
+   ||  in_core->bm_offset != MD_4kB_SECT + MD_32kB_SECT
+   ||  in_core->al_stripes != 1
+   ||  in_core->al_stripe_size_4k != MD_32kB_SECT/8)
+   goto err;
+   }
+
+   if (capacity < in_core->md_size_sect)
+   goto err;
+   if (capacity - in_core->md_size_sect < drbd_md_first_sector(bdev))
+   goto err;
+
+   /* should be aligned, and at least 32k */
+   if ((on_disk_al_sect & 7) || (on_disk_al_sect < MD_32kB_SECT))
+   goto err;
+
+   /* should fit (for now: exactly) into the available on-disk space;

[PATCH 05/18] drbd: mechanically rename la_size to la_size_sect

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

Make it obvious that this value is in units of 512 Byte sectors.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_main.c |6 +++---
 drivers/block/drbd/drbd_nl.c   |   16 
 drivers/block/drbd/drbd_receiver.c |2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index b9bfb10..d8bbb41 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2839,7 +2839,7 @@ void conn_md_sync(struct drbd_tconn *tconn)
 
 /* aligned 4kByte */
 struct meta_data_on_disk {
-   u64 la_size;   /* last agreed size. */
+   u64 la_size_sect;  /* last agreed size. */
u64 uuid[UI_SIZE];   /* UUIDs. */
u64 device_uuid;
u64 reserved_u64_1;
@@ -2890,7 +2890,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
 
memset(buffer, 0, sizeof(*buffer));
 
-   buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
+   buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
for (i = UI_CURRENT; i < UI_SIZE; i++)
buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
@@ -3052,7 +3052,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct 
drbd_backing_dev *bdev)
 
rv = NO_ERROR;
 
-   bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
+   bdev->md.la_size_sect = be64_to_cpu(buffer->la_size_sect);
for (i = UI_CURRENT; i < UI_SIZE; i++)
bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
bdev->md.flags = be32_to_cpu(buffer->flags);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 5621df8..d5211b0 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -819,7 +819,7 @@ void drbd_resume_io(struct drbd_conf *mdev)
 enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum 
dds_flags flags) __must_hold(local)
 {
sector_t prev_first_sect, prev_size; /* previous meta location */
-   sector_t la_size, u_size;
+   sector_t la_size_sect, u_size;
sector_t size;
char ppb[10];
 
@@ -842,7 +842,7 @@ enum determine_dev_size drbd_determine_dev_size(struct 
drbd_conf *mdev, enum dds
 
prev_first_sect = drbd_md_first_sector(mdev->ldev);
prev_size = mdev->ldev->md.md_size_sect;
-   la_size = mdev->ldev->md.la_size_sect;
+   la_size_sect = mdev->ldev->md.la_size_sect;
 
/* TODO: should only be some assert here, not (re)init... */
drbd_md_set_sector_offsets(mdev, mdev->ldev);
@@ -878,7 +878,7 @@ enum determine_dev_size drbd_determine_dev_size(struct 
drbd_conf *mdev, enum dds
if (rv == dev_size_error)
goto out;
 
-   la_size_changed = (la_size != mdev->ldev->md.la_size_sect);
+   la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect);
 
md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
|| prev_size   != mdev->ldev->md.md_size_sect;
@@ -900,9 +900,9 @@ enum determine_dev_size drbd_determine_dev_size(struct 
drbd_conf *mdev, enum dds
drbd_md_mark_dirty(mdev);
}
 
-   if (size > la_size)
+   if (size > la_size_sect)
rv = grew;
-   if (size < la_size)
+   if (size < la_size_sect)
rv = shrunk;
 out:
lc_unlock(mdev->act_log);
@@ -917,7 +917,7 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct 
drbd_backing_dev *bdev,
  sector_t u_size, int assume_peer_has_space)
 {
sector_t p_size = mdev->p_size;   /* partner's disk size. */
-   sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
+   sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
sector_t m_size; /* my size */
sector_t size = 0;
 
@@ -931,8 +931,8 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct 
drbd_backing_dev *bdev,
if (p_size && m_size) {
size = min_t(sector_t, p_size, m_size);
} else {
-   if (la_size) {
-   size = la_size;
+   if (la_size_sect) {
+   size = la_size_sect;
if (m_size && m_size < size)
size = m_size;
if (p_size && p_size < size)
diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index a9eccfc..8172a2c 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3992,7 +3992,7 @@ static int receive_state(struct drbd_tconn *tconn, struct 
packet_info *pi)
 
clear_bit(DISCARD_MY_DATA,

[PATCH 02/18] drbd: cleanup ondisk meta data layout calculations and defines

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

Add a comment about our meta data layout variants,
and rename a few defines (e.g. MD_RESERVED_SECT -> MD_128MB_SECT)
to make it clear that they are short hand for fixed constants,
and not arbitrarily to be redefined as one may see fit.

Properly pad struct meta_data_on_disk to 4kB,
and initialize to zero not only the first 512 Byte,
but all of it in drbd_md_sync().

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_actlog.c |   28 ++---
 drivers/block/drbd/drbd_bitmap.c |   13 +-
 drivers/block/drbd/drbd_int.h|   86 ++
 drivers/block/drbd/drbd_main.c   |   11 +++--
 drivers/block/drbd/drbd_nl.c |   42 ++-
 5 files changed, 123 insertions(+), 57 deletions(-)

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 92510f8..b230d91 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -209,7 +209,8 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct 
drbd_backing_dev *bdev,
 current->comm, current->pid, __func__,
 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : 
"READ");
 
-   err = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE);
+   /* we do all our meta data IO in aligned 4k blocks. */
+   err = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, 4096);
if (err) {
dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed with error 
%d\n",
(unsigned long long)sector, (rw & WRITE) ? "WRITE" : 
"READ", err);
@@ -350,6 +351,24 @@ static unsigned int rs_extent_to_bm_page(unsigned int 
rs_enr)
 (BM_EXT_SHIFT - BM_BLOCK_SHIFT));
 }
 
+static sector_t al_tr_number_to_on_disk_sector(struct drbd_conf *mdev)
+{
+   const unsigned int stripes = 1;
+   const unsigned int stripe_size_4kB = MD_32kB_SECT/MD_4kB_SECT;
+
+   /* transaction number, modulo on-disk ring buffer wrap around */
+   unsigned int t = mdev->al_tr_number % (stripe_size_4kB * stripes);
+
+   /* ... to aligned 4k on disk block */
+   t = ((t % stripes) * stripe_size_4kB) + t/stripes;
+
+   /* ... to 512 byte sector in activity log */
+   t *= 8;
+
+   /* ... plus offset to the on disk position */
+   return mdev->ldev->md.md_offset + mdev->ldev->md.al_offset + t;
+}
+
 static int
 _al_write_transaction(struct drbd_conf *mdev)
 {
@@ -432,13 +451,12 @@ _al_write_transaction(struct drbd_conf *mdev)
if (mdev->al_tr_cycle >= mdev->act_log->nr_elements)
mdev->al_tr_cycle = 0;
 
-   sector =  mdev->ldev->md.md_offset
-   + mdev->ldev->md.al_offset
-   + mdev->al_tr_pos * (MD_BLOCK_SIZE>>9);
+   sector = al_tr_number_to_on_disk_sector(mdev);
 
crc = crc32c(0, buffer, 4096);
buffer->crc32c = cpu_to_be32(crc);
 
+   /* normal execution path goes through all three branches */
if (drbd_bm_write_hinted(mdev))
err = -EIO;
/* drbd_chk_io_error done already */
@@ -446,8 +464,6 @@ _al_write_transaction(struct drbd_conf *mdev)
err = -EIO;
drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
} else {
-   /* advance ringbuffer position and transaction counter */
-   mdev->al_tr_pos = (mdev->al_tr_pos + 1) % 
(MD_AL_SECTORS*512/MD_BLOCK_SIZE);
mdev->al_tr_number++;
}
 
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 8dc2950..64fbb83 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -612,6 +612,17 @@ static void bm_memset(struct drbd_bitmap *b, size_t 
offset, int c, size_t len)
}
 }
 
+/* For the layout, see comment above drbd_md_set_sector_offsets(). */
+static u64 drbd_md_on_disk_bits(struct drbd_backing_dev *ldev)
+{
+   u64 bitmap_sectors;
+   if (ldev->md.al_offset == 8)
+   bitmap_sectors = ldev->md.md_size_sect - ldev->md.bm_offset;
+   else
+   bitmap_sectors = ldev->md.al_offset - ldev->md.bm_offset;
+   return bitmap_sectors << (9 + 3);
+}
+
 /*
  * make sure the bitmap has enough room for the attached storage,
  * if necessary, resize.
@@ -668,7 +679,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t 
capacity, int set_new_bits)
words = ALIGN(bits, 64) >> LN2_BPL;
 
if (get_ldev(mdev)) {
-   u64 bits_on_disk = 
((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12;
+   u64 bits_on_disk = drbd_md_on_disk_bits(mdev->ldev);
put_ldev(mdev);
if (bits > bits_on_disk) {
dev_info(DEV, "bits =

[PATCH 01/18] drbd: cleanup bogus assert message

2013-03-19 Thread Philipp Reisner

From: Lars Ellenberg 

This fixes ASSERT( mdev->state.disk == D_FAILED ) in 
drivers/block/drbd/drbd_main.c

When we detach from local disk, we let the local refcount hit zero twice.

First, we transition to D_FAILED, so we won't give out new references
to incoming requests; we still may give out *internal* references, though.
Once the refcount hits zero [1] while in D_FAILED, we queue a transition
to D_DISKLESS to our worker.  We need to queue it, because we may be in
atomic context when putting the reference.
Once the transition to D_DISKLESS actually happened [2] from worker context,
we don't give out new internal references either.

Between hitting zero the first time [1] and actually transition to
D_DISKLESS [2], there may be a few very short lived internal get/put,
so we may hit zero more than once while being in D_FAILED, or even see a
race where a an internal get_ldev() happened while D_FAILED, but the
corresponding put_ldev() happens just after the transition to D_DISKLESS.

That's why we have the additional test_and_set_bit(GO_DISKLESS,);
and that's why the assert was placed wrong.
Since there was exactly one code path left to drbd_go_diskless(),
and that checks already for D_FAILED, drop that assert,
and fold in the drbd_queue_work().

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_int.h  |7 ---
 drivers/block/drbd/drbd_main.c |7 ---
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 6b51afa..db504d0 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1148,7 +1148,6 @@ extern int drbd_bitmap_io_from_worker(struct drbd_conf 
*mdev,
char *why, enum bm_flag flags);
 extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
 extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
-extern void drbd_go_diskless(struct drbd_conf *mdev);
 extern void drbd_ldev_destroy(struct drbd_conf *mdev);
 
 /* Meta data layout
@@ -2053,9 +2052,11 @@ static inline void put_ldev(struct drbd_conf *mdev)
if (mdev->state.disk == D_DISKLESS)
/* even internal references gone, safe to destroy */
drbd_ldev_destroy(mdev);
-   if (mdev->state.disk == D_FAILED)
+   if (mdev->state.disk == D_FAILED) {
/* all application IO references gone. */
-   drbd_go_diskless(mdev);
+   if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
+   drbd_queue_work(&mdev->tconn->sender_work, 
&mdev->go_diskless);
+   }
wake_up(&mdev->misc_wait);
}
 }
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 8c13eeb..6224963 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3255,13 +3255,6 @@ static int w_go_diskless(struct drbd_work *w, int unused)
return 0;
 }
 
-void drbd_go_diskless(struct drbd_conf *mdev)
-{
-   D_ASSERT(mdev->state.disk == D_FAILED);
-   if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
-   drbd_queue_work(&mdev->tconn->sender_work, &mdev->go_diskless);
-}
-
 /**
  * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
  * @mdev:  DRBD device.
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[GIT PULL] drbd build fix in case CONFIG_CRYPTO_HMAC is not set

2013-02-04 Thread Philipp Reisner

The following changes since commit d88c3ab963d4cce09b25ef661b871bd7af6dad0d:

  drbd: only fail empty flushes if no good data is reachable (2013-01-30 
10:40:33 +0100)

are available in the git repository at:

  git://git.drbd.org/linux-drbd.git for-jens-3.8-fix

for you to fetch changes up to 78aa7987a223e8542f2735dace439690c6171ac5:

  drbd: Fix build error when CONFIG_CRYPTO_HMAC is not set (2013-02-04 18:14:03 
+0100)


Philipp Reisner (1):
  drbd: Fix build error when CONFIG_CRYPTO_HMAC is not set

 drivers/block/drbd/drbd_receiver.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index a9eccfc..6e27dde 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -4659,8 +4659,8 @@ static int drbd_do_features(struct drbd_tconn *tconn)
 #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
 static int drbd_do_auth(struct drbd_tconn *tconn)
 {
-   dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
-   dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
+   conn_err(tconn, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
+   conn_err(tconn, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
return -1;
 }
 #else

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [GIT PULL] drbd build fix in case CONFIG_CRYPTO_HMAC is not set

2013-02-05 Thread Philipp Reisner

> NOT pulled. Base the branch off my for-linus. When I pull the below into
> that, I don't get a small patch, I get all changes from 3.7 to 3.8-rc5.

Ok, now based on your for-linus.


The following changes since commit 1383923d1985cef2bceb8128094fbe5e05de7435:

  Merge branch 'for-jens' of git://git.drbd.org/linux-drbd into for-linus 
(2013-01-22 08:22:11 -0700)

are available in the git repository at:


  git://git.drbd.org/linux-drbd.git for-jens-3.8-fix

for you to fetch changes up to 23c275cba16615ac8cd3815cb1a86fd64cf12aa6:

  drbd: Fix build error when CONFIG_CRYPTO_HMAC is not set (2013-02-05 15:58:34 
+0100)

--------
Philipp Reisner (1):
  drbd: Fix build error when CONFIG_CRYPTO_HMAC is not set

 drivers/block/drbd/drbd_receiver.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index a9eccfc..6e27dde 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -4659,8 +4659,8 @@ static int drbd_do_features(struct drbd_tconn *tconn)
 #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
 static int drbd_do_auth(struct drbd_tconn *tconn)
 {
-   dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
-   dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
+   conn_err(tconn, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
+   conn_err(tconn, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
return -1;
 }
 #else


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[GIT PULL] drbd activity log improvements for the 3.9 merge window

2013-01-30 Thread Philipp Reisner

Hi Jens,

this is our contribution to the next merge-window:

With this release we have three important improvements to the activity log:

  1 Starting with 8.4.3 the activity log has a variable size on disk
(before it was fixed 32KiByte). 
-> The limitation of 6433 on the al-extents option is no longer.

  2 The activity log can be organised as a stripe set in the meta-data.

  3 Never block a caller submitting a BIO, but hand off the writing
of AL-updates to a dedicated worker thread.
(In the past DRBD blocked the caller when a submitted BIO is not
 covered by the current active set, until the transaction was written
 into the AL.)

With these changed we see substantial improvement in overall IOPs,
on workloads that are bigger than the active set (al-extents * 4MiB)



The following changes since commit 478c030eecbec927d62561c5f48a4515ea0fa21a:

  drivers/block/mtip32xx/mtip32xx.c:1726:5: sparse: symbol 'mtip_send_trim' was 
not declared. Should it be static? (2013-01-12 09:15:19 +0100)

are available in the git repository at:

  git://git.drbd.org/linux-drbd.git for-jens-3.9-drivers

for you to fetch changes up to 99cb6a9f0230ed31b712c48983a2ce9ca20a30e1:

  drbd: adjust upper limit for activity log extents (2013-01-30 10:19:58 +0100)


Lars Ellenberg (18):
  drbd: cleanup bogus assert message
  drbd: cleanup ondisk meta data layout calculations and defines
  drbd: prepare for new striped layout of activity log
  drbd: use the cached meta_dev_idx
  drbd: mechanically rename la_size to la_size_sect
  drbd: read meta data early, base on-disk offsets on super block
  drbd: Clarify when activity log I/O is delegated to the worker thread
  drbd: drbd_al_being_io: short circuit to reduce latency
  drbd: split __drbd_make_request in before and after drbd_al_begin_io
  drbd: prepare to queue write requests on a submit worker
  drbd: split drbd_al_begin_io into fastpath, prepare, and commit
  drbd: split out some helper functions to drbd_al_begin_io
  drbd: queue writes on submitter thread, unless they pass the activity log 
fastpath
  lru_cache: introduce lc_get_cumulative()
  drbd: consolidate as many updates as possible into one AL transaction
  drbd: move start io accounting before activity log transaction
  drbd: try hard to max out the updates per AL transaction
  drbd: adjust upper limit for activity log extents

Philipp Reisner (7):
  drbd: reset ap_in_flight counter for new connections
  drbd: abort start of resync early, if it raced with connection breakage
  drbd: move invalidating the whole bitmap out of after_state ch()
  drbd: fix effective error returned when refusing an invalidate
  drbd: drop now useless duplicate state request from invalidate
  drbd: fix spurious warning about bitmap being locked from detach
  drbd: Fix disconnect to keep the peer disk state if connection breaks 
during operation

 drivers/block/drbd/drbd_actlog.c   |  246 ++-
 drivers/block/drbd/drbd_bitmap.c   |   13 +-
 drivers/block/drbd/drbd_int.h  |  179 +-
 drivers/block/drbd/drbd_main.c |  249 ++--
 drivers/block/drbd/drbd_nl.c   |  194 +---
 drivers/block/drbd/drbd_receiver.c |5 +-
 drivers/block/drbd/drbd_req.c  |  166 +---
 drivers/block/drbd/drbd_state.c|   28 ++--
 drivers/block/drbd/drbd_strings.c  |1 +
 drivers/block/drbd/drbd_worker.c   |9 +-
 include/linux/drbd.h   |5 +-
 include/linux/drbd_limits.h|   11 +-
 include/linux/lru_cache.h  |1 +
 lib/lru_cache.c|   55 ++--
 14 files changed, 842 insertions(+), 320 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[GIT PULL] drbd: Fix flushes without data on a diskless primary node

2013-01-30 Thread Philipp Reisner

Hi Jens,

Please consider to forward this fix for 3.8 to Linus.

The following changes since commit 949db153b6466c6f7cad5a427ecea94985927311:

  Linux 3.8-rc5 (2013-01-25 11:57:28 -0800)

are available in the git repository at:

  git://git.drbd.org/linux-drbd.git for-jens-3.8-fix

for you to fetch changes up to d88c3ab963d4cce09b25ef661b871bd7af6dad0d:

  drbd: only fail empty flushes if no good data is reachable (2013-01-30 
10:40:33 +0100)


Lars Ellenberg (1):
  drbd: only fail empty flushes if no good data is reachable

 drivers/block/drbd/drbd_req.c |   12 
 drivers/block/drbd/drbd_req.h |8 
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index f58a4a4..41bb058 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -263,8 +263,7 @@ void drbd_req_complete(struct drbd_request *req, struct 
bio_and_error *m)
else
root = &mdev->read_requests;
drbd_remove_request_interval(root, req);
-   } else if (!(s & RQ_POSTPONED))
-   D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
+   }
 
/* Before we can signal completion to the upper layers,
 * we may need to close the current transfer log epoch.
@@ -755,6 +754,11 @@ int __req_mod(struct drbd_request *req, enum 
drbd_req_event what,
D_ASSERT(req->rq_state & RQ_NET_PENDING);
mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK|RQ_NET_DONE);
break;
+
+   case QUEUE_AS_DRBD_BARRIER:
+   start_new_tl_epoch(mdev->tconn);
+   mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE);
+   break;
};
 
return rv;
@@ -975,8 +979,8 @@ static int drbd_process_write_request(struct drbd_request 
*req)
/* The only size==0 bios we expect are empty flushes. */
D_ASSERT(req->master_bio->bi_rw & REQ_FLUSH);
if (remote)
-   start_new_tl_epoch(mdev->tconn);
-   return 0;
+   _req_mod(req, QUEUE_AS_DRBD_BARRIER);
+   return remote;
}
 
if (!remote && !send_oos)
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 016de6b..c407f4a 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -88,6 +88,14 @@ enum drbd_req_event {
QUEUE_FOR_NET_READ,
QUEUE_FOR_SEND_OOS,
 
+   /* An empty flush is queued as P_BARRIER,
+* which will cause it to complete "successfully",
+* even if the local disk flush failed.
+*
+* Just like "real" requests, empty flushes (blkdev_issue_flush()) will
+* only see an error if neither local nor remote data is reachable. */
+   QUEUE_AS_DRBD_BARRIER,
+
SEND_CANCELED,
SEND_FAILED,
HANDED_OVER_TO_NETWORK,

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

FLUSH/FUA documentation & code discrepancy

2012-09-04 Thread Philipp Reisner

Hi,

I think commit 1e87901e18 was wrong. Starting with that commit the REQ_FLUSH 
and REQ_FUA bits get stripped away if the queue does not advertise REQ_FLUSH
or REQ_FUA support.

But the REQ_FLUSH bit is also tested for when not merging requests
(blk_queue_bio()) or when it comes to the elevator (blk_flush_plug_list()).

So, since this patch the elevator reorders write requests on queues that 
do not have REQ_FLUSH or REQ_FUA set.

While on queues that have REQ_FLUSH or REQ_FUA set, the elevator does
not reorder writes across FLUSHes.

The Documentation/block/writeback_cache_control.txt file says:
--snip--
Implementation details for filesystems
--

Filesystems can simply set the REQ_FLUSH and REQ_FUA bits and do not have to
worry if the underlying devices need any explicit cache flushing and how
the Forced Unit Access is implemented.  The REQ_FLUSH and REQ_FUA flags
may both be set on a single bio.
--snap--

I have the impression every file system lets IO drain, and issues a
flush afterwards with the blkdev_issue_flush() function. BTW that
function turns into a non-obvious no-op as soon as the queue does not
have the REQ_FUA or REQ_FLUSH bits set. It does not look like it is
a no-op by intention.

The file systems seem to be all fine, only in DRBD we have a mode were
we depend on REQ_FUA/REQ_FLUSH requests being real boundaries for reordering
of writes. This is broken since the mentioned commit as we recently found out.

I suggest that either this commit gets reverted, or the documentation
is updated. I am ready to prepare such a patch, but I need directions
how it should be fixed.

Best regards,
 Phil
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [Drbd-dev] FLUSH/FUA documentation & code discrepancy

2012-09-05 Thread Philipp Reisner

> Currently, FLUSH/FUA doesn't enforce any ordering requirement.  File
> systems are responsible for draining all writes which have to happen
> before and not issue further writes which should come after.

Ok. That is a clear statement. So we will do it that way.

The "Currently" in you statement, suggests that there might be something
more mighty in the future. Is that true?

We are looking for a method that allows us to submit some writes, then
an IO-barrier, and then further writes. 

Best,
 Phil
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[GIT PULL] drbd-8.3 updates

2012-10-02 Thread Philipp Reisner

Hi Jens,

Please consider to pull these changes for the 3.7 merge window.

Best,
 Phil

The following changes since commit a0d271cbfed1dd50278c6b06bead3d00ba0a88f9:

  Linux 3.6 (2012-09-30 16:47:46 -0700)

are available in the git repository at:

  git://git.drbd.org/linux-drbd.git for-jens

for you to fetch changes up to a783d564a1badbb87b3f96aa8df581ed4167a9c9:

  drbd: log request sector offset and size for IO errors (2012-10-02 14:52:24 
+0200)


Lars Ellenberg (9):
  drbd: introduce stop-sector to online verify
  drbd: change error cleanup goto for failed kobject_init_and_add
  drbd: panic on delayed completion of aborted requests
  drbd: fix potential deadlock during bitmap (re-)allocation
  drbd: a few more GFP_KERNEL -> GFP_NOIO
  drbd: wait for meta data IO completion even with failed disk, unless 
force-detached
  drbd: prepare for more than 32 bit flags
  drbd: always write bitmap on detach
  drbd: log request sector offset and size for IO errors

Philipp Reisner (6):
  drbd: Add a drbd directory to sysfs
  drbd: expose the data generation identifiers via sysfs
  drbd: Protect accesses to the uuid set with a spinlock
  drbd: Fix a potential issue with the DISCARD_CONCURRENT flag
  drbd: Avoid NetworkFailure state during disconnect
  drbd: Remove dead code

 drivers/block/drbd/Makefile|1 +
 drivers/block/drbd/drbd_actlog.c   |   19 ++--
 drivers/block/drbd/drbd_bitmap.c   |   24 ++---
 drivers/block/drbd/drbd_int.h  |  112 +--
 drivers/block/drbd/drbd_main.c |  175 
 drivers/block/drbd/drbd_nl.c   |  109 +-
 drivers/block/drbd/drbd_proc.c |   14 ++-
 drivers/block/drbd/drbd_receiver.c |  147 --
 drivers/block/drbd/drbd_req.c  |   43 ++---
 drivers/block/drbd/drbd_sysfs.c|   86 ++
 drivers/block/drbd/drbd_worker.c   |   87 ++
 include/linux/drbd.h   |4 +-
 include/linux/drbd_nl.h|1 +
 13 files changed, 571 insertions(+), 251 deletions(-)
 create mode 100644 drivers/block/drbd/drbd_sysfs.c

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [GIT PULL] drbd-8.3 updates

2012-10-03 Thread Philipp Reisner

> Not pulled. Two reasons:
> 
> - It's late (in the merge window)
> - and it's not based off my for-3.7/drivers branch, hence I get a ton of
>   unrelated changes with a pull into that branch.

Hi Jens,

I can rebase it for you in a few hours. Would influence this your decission?

Best,
 Phil
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [GIT PULL] drbd-8.3 updates

2012-10-03 Thread Philipp Reisner

Am Mittwoch, 3. Oktober 2012, 11:24:09 schrieben Sie:
> > Not pulled. Two reasons:
> > 
> > - It's late (in the merge window)
> > - and it's not based off my for-3.7/drivers branch, hence I get a ton of
> > 
> >   unrelated changes with a pull into that branch.
> 
> Hi Jens,
> 
> I can rebase it for you in a few hours. Would influence this your decission?
> 

Hi Jens,

Is there a convenient way for me to find our when it is the right time
to send pull requests your way? (i.e. a notification when you create your
for-3.x/drivers branch)

Rebasing it on your drivers tree was trivial, here is the updated pull
request:

The following changes since commit fab74e7a8f0f8d0af2356c28aa60d55f9e6f5f8b:

  loop: Make explicit loop device destruction lazy (2012-09-28 10:42:23 +0200)

are available in the git repository at:

  git://git.drbd.org/linux-drbd.git for-jens

for you to fetch changes up to 61e8114a682b0e868696f8363ed03e5fd4c750d1:

  drbd: log request sector offset and size for IO errors (2012-10-03 11:54:45 
+0200)


Lars Ellenberg (9):
  drbd: introduce stop-sector to online verify
  drbd: change error cleanup goto for failed kobject_init_and_add
  drbd: panic on delayed completion of aborted requests
  drbd: fix potential deadlock during bitmap (re-)allocation
  drbd: a few more GFP_KERNEL -> GFP_NOIO
  drbd: wait for meta data IO completion even with failed disk, unless 
force-detached
  drbd: prepare for more than 32 bit flags
  drbd: always write bitmap on detach
  drbd: log request sector offset and size for IO errors

Philipp Reisner (6):
  drbd: Add a drbd directory to sysfs
  drbd: expose the data generation identifiers via sysfs
  drbd: Protect accesses to the uuid set with a spinlock
  drbd: Fix a potential issue with the DISCARD_CONCURRENT flag
  drbd: Avoid NetworkFailure state during disconnect
  drbd: Remove dead code

 drivers/block/drbd/Makefile|1 +
 drivers/block/drbd/drbd_actlog.c   |   19 ++--
 drivers/block/drbd/drbd_bitmap.c   |   24 ++---
 drivers/block/drbd/drbd_int.h  |  112 +--
 drivers/block/drbd/drbd_main.c |  175 
 drivers/block/drbd/drbd_nl.c   |  109 +-
 drivers/block/drbd/drbd_proc.c |   14 ++-
 drivers/block/drbd/drbd_receiver.c |  147 --
 drivers/block/drbd/drbd_req.c  |   43 ++---
 drivers/block/drbd/drbd_sysfs.c|   86 ++
 drivers/block/drbd/drbd_worker.c   |   87 ++
 include/linux/drbd.h   |4 +-
 include/linux/drbd_nl.h|1 +
 13 files changed, 571 insertions(+), 251 deletions(-)
 create mode 100644 drivers/block/drbd/drbd_sysfs.c

Best,
 Phil
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [GIT PULL] drbd-8.3 updates

2012-10-03 Thread Philipp Reisner

> Thanks, one question before this is pulled in:
> > Philipp Reisner (6):
> >   drbd: Add a drbd directory to sysfs
> >   drbd: expose the data generation identifiers via sysfs
> 
> What are these? It's sitting in /sys/block//drbd/, I don't see any
> documentation or justification for that.
> 
> Why isn't it off in debugfs or similar instead?

The long-time goal is to get rid of the /proc/drbd virtual file, and
present the information that was there in a more structured way in /sys.

This patch adds a very first step into that direction. Later we intend to
have here things like the connections state, device roles, statistics
counters there.

When coming up with the layout we used the sysfs presence of software raid 
as example.

I have removed it from this pull-request, so that there is more time for
consideration before the next merge window.

The following changes since commit fab74e7a8f0f8d0af2356c28aa60d55f9e6f5f8b:

  loop: Make explicit loop device destruction lazy (2012-09-28 10:42:23 +0200)

are available in the git repository at:

  git://git.drbd.org/linux-drbd.git for-jens

for you to fetch changes up to 56a2dabd4d953b70c4630387368d79d6d1625c1d:

  drbd: log request sector offset and size for IO errors (2012-10-03 15:46:04 
+0200)


Lars Ellenberg (8):
  drbd: introduce stop-sector to online verify
  drbd: panic on delayed completion of aborted requests
  drbd: fix potential deadlock during bitmap (re-)allocation
  drbd: a few more GFP_KERNEL -> GFP_NOIO
  drbd: wait for meta data IO completion even with failed disk, unless 
force-detached
  drbd: prepare for more than 32 bit flags
  drbd: always write bitmap on detach
  drbd: log request sector offset and size for IO errors

Philipp Reisner (4):
  drbd: Protect accesses to the uuid set with a spinlock
  drbd: Fix a potential issue with the DISCARD_CONCURRENT flag
  drbd: Avoid NetworkFailure state during disconnect
  drbd: Remove dead code

 drivers/block/drbd/drbd_actlog.c   |   19 ++--
 drivers/block/drbd/drbd_bitmap.c   |   24 ++---
 drivers/block/drbd/drbd_int.h  |  108 +--
 drivers/block/drbd/drbd_main.c |  170 
 drivers/block/drbd/drbd_nl.c   |   74 +---
 drivers/block/drbd/drbd_proc.c |   14 ++-
 drivers/block/drbd/drbd_receiver.c |  147 ---
 drivers/block/drbd/drbd_req.c  |   43 ++---
 drivers/block/drbd/drbd_worker.c   |   87 ++
 include/linux/drbd.h   |4 +-
 include/linux/drbd_nl.h|1 +
 11 files changed, 451 insertions(+), 240 deletions(-)

Best,
 Phil
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [GIT PULL] 4 fixes for drbd

2012-11-05 Thread Philipp Reisner

> 
> Can you rebase this against for-3.8/drivers? Thanks.

Hi Jens,

One of these changes fixes a regression that was introduced in the last
merge window. Please consider to pull this single commit.
(I will rebase the other 3 commit on for-3.8/drivers)


The following changes since commit a13c29ddf73d3be4fbb2b1bbced64014986cd87a:
  Jens Axboe (1):
Merge branch 'for-jens' of 
git://git.kernel.org/pub/scm/linux/kernel/git/jikos/linux-block into 
for-3.7/drivers

are available in the git repository at:

  git://git.drbd.org/linux-drbd.git for-jens

Lars Ellenberg (1):
  drbd: fix regression: potential NULL pointer dereference

 drivers/block/drbd/drbd_int.h  |5 +
 drivers/block/drbd/drbd_main.c |8 ++--
 2 files changed, 11 insertions(+), 2 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [GIT PULL] 4 fixes for drbd

2012-11-05 Thread Philipp Reisner

Am Montag, 5. November 2012, 15:43:28 schrieb Jens Axboe:
> On 2012-11-05 15:35, Philipp Reisner wrote:
> >> Can you rebase this against for-3.8/drivers? Thanks.
> > 
> > Hi Jens,
> > 
> > One of these changes fixes a regression that was introduced in the last
> > merge window. Please consider to pull this single commit.
> > (I will rebase the other 3 commit on for-3.8/drivers)
> > 
> > The following changes since commit a13c29ddf73d3be4fbb2b1bbced64014986cd87a:
> >   Jens Axboe (1):
> > Merge branch 'for-jens' of
> > git://git.kernel.org/pub/scm/linux/kernel/git/jikos/linux-block
> > into for-3.7/drivers> 
> > are available in the git repository at:
> >   git://git.drbd.org/linux-drbd.git for-jens
> > 
> > Lars Ellenberg (1):
> >   drbd: fix regression: potential NULL pointer dereference
> >  
> >  drivers/block/drbd/drbd_int.h  |5 +
> >  drivers/block/drbd/drbd_main.c |8 ++--
> >  2 files changed, 11 insertions(+), 2 deletions(-)
> 
> Sure, of course I'll take a regression fix. But the fix does not apply
> to current tree:
> 
> axboe@nelson:/src/linux-block $ patch -p1 --dry-run < ~/1.txt
> patching file drivers/block/drbd/drbd_int.h
> Hunk #1 FAILED at 2545.
> 1 out of 1 hunk FAILED -- saving rejects to file
> drivers/block/drbd/drbd_int.h.rej patching file
> drivers/block/drbd/drbd_main.c
> Hunk #1 FAILED at 4232.
> 1 out of 1 hunk FAILED -- saving rejects to file
> drivers/block/drbd/drbd_main
> 
> Regression fixes for the current tree should be based on the current
> tree, looks like you used an outdated for-3.7/drivers branch and this
> fix depends on other fixes.

Hmm, apparently the changes I submitted to you on October 2 and 3
(which where pulled by you into the for-3.7/drivers tree) never found 
their way into Linus' tree. (I was not aware of that)

The mentioned patch fixes a regression there. 

Ok, then please ignore this pull request. I will send a new one for
for-3.8/drivers in a few days.

Best,
 Phil
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[GIT PULL] 4 fixes for drbd

2012-10-29 Thread Philipp Reisner

Hi Jens,

please consider to pull these fixes in. It is based on your
for-3.7/drivers branch.

Best regards,
 Phil


The following changes since commit 34a73dd594699dc3834167297a74c43948bb6e41:

  Revert "memstick: add support for legacy memorysticks" (2012-10-10 16:13:26 
-0600)

are available in the git repository at:

  git://git.drbd.org/linux-drbd.git for-jens

for you to fetch changes up to 731d4596a6f3ba41418a0b11018c453456c51d92:

  drbd: check return of kmalloc in receive_uuids (2012-10-29 13:18:13 +0100)


Jing Wang (1):
  drbd: check return of kmalloc in receive_uuids

Lars Ellenberg (2):
  drbd: fix regression: potential NULL pointer dereference
  drbd: don't try to clear bits once the disk has failed

Philipp Reisner (1):
  drbd: Broadcast sync progress no more often than once per second

 drivers/block/drbd/drbd_actlog.c   |   19 +--
 drivers/block/drbd/drbd_int.h  |6 ++
 drivers/block/drbd/drbd_main.c |8 ++--
 drivers/block/drbd/drbd_nl.c   |5 +
 drivers/block/drbd/drbd_receiver.c |4 
 drivers/block/drbd/drbd_worker.c   |4 
 6 files changed, 38 insertions(+), 8 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [Drbd-dev] [PATCH] drbd: use list_move_tail instead of list_del/list_add_tail

2012-09-05 Thread Philipp Reisner


Thanks, applied.

Best regards,
 Phil
> From: Wei Yongjun 
> 
> Using list_move_tail() instead of list_del() + list_add_tail().
> 
> spatch with a semantic match is used to found this problem.
> (http://coccinelle.lip6.fr/)
> 
> Signed-off-by: Wei Yongjun 
> ---
>  drivers/block/drbd/drbd_worker.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/drivers/block/drbd/drbd_worker.c
> b/drivers/block/drbd/drbd_worker.c index 6bce2cc..a196281 100644
> --- a/drivers/block/drbd/drbd_worker.c
> +++ b/drivers/block/drbd/drbd_worker.c
> @@ -141,8 +141,7 @@ static void drbd_endio_write_sec_final(struct
> drbd_epoch_entry *e) __releases(lo
> 
>   spin_lock_irqsave(&mdev->req_lock, flags);
>   mdev->writ_cnt += e->size >> 9;
> - list_del(&e->w.list); /* has been on active_ee or sync_ee */
> - list_add_tail(&e->w.list, &mdev->done_ee);
> + list_move_tail(&e->w.list, &mdev->done_ee);
> 
>   /* No hlist_del_init(&e->collision) here, we did not send the Ack yet,
>* neither did we wake possibly waiting conflicting requests.
> 
> 
> ___
> drbd-dev mailing list
> drbd-...@lists.linbit.com
> http://lists.linbit.com/mailman/listinfo/drbd-dev
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[GIT PULL] drbd Kconfig update and minor fixes

2012-12-06 Thread Philipp Reisner

The following changes since commit 84ad6845fbb1248228d3beab8084e4b5f6f82b1d:

  Merge branch 'stable/for-jens-3.8' of 
git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen into for-3.8/drivers 
(2012-12-01 09:42:41 +0100)

are available in the git repository at:


  git://git.drbd.org/linux-drbd.git for-jens

for you to fetch changes up to d2ec180c23a5a1bfe34d8638b0342a47c00cf70f:

  drbd: update Kconfig to match current dependencies (2012-12-06 13:08:29 +0100)


Lars Ellenberg (2):
  drbd: respect no-md-barriers setting also when changed online via 
disk-options
  drbd: update Kconfig to match current dependencies

Philipp Reisner (3):
  drbd: Remove obsolete check
  drbd: close race between drbd_set_role and drbd_connect
  drbd: Fix drbdsetup wait-connect, wait-sync etc... commands

 drivers/block/drbd/Kconfig |   10 ++
 drivers/block/drbd/drbd_main.c |2 +-
 drivers/block/drbd/drbd_nl.c   |   16 +++-
 drivers/block/drbd/drbd_receiver.c |   10 ++
 4 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig
index df09837..7845bd6 100644
--- a/drivers/block/drbd/Kconfig
+++ b/drivers/block/drbd/Kconfig
@@ -2,13 +2,14 @@
 # DRBD device driver configuration
 #
 
-comment "DRBD disabled because PROC_FS, INET or CONNECTOR not selected"
-   depends on PROC_FS='n' || INET='n' || CONNECTOR='n'
+comment "DRBD disabled because PROC_FS or INET not selected"
+   depends on PROC_FS='n' || INET='n'
 
 config BLK_DEV_DRBD
tristate "DRBD Distributed Replicated Block Device support"
-   depends on PROC_FS && INET && CONNECTOR
+   depends on PROC_FS && INET
select LRU_CACHE
+   select LIBCRC32C
default n
help
 
@@ -58,7 +59,8 @@ config DRBD_FAULT_INJECTION
  32data read
  64read ahead
  128   kmalloc of bitmap
- 256   allocation of EE (epoch_entries)
+ 256   allocation of peer_requests
+ 512   insert data corruption on receiving side
 
  fault_devs: bitmask of minor numbers
  fault_rate: frequency in percent
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 52de26d..8c13eeb 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -840,7 +840,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
}
spin_lock_irq(&mdev->ldev->md.uuid_lock);
for (i = UI_CURRENT; i < UI_SIZE; i++)
-   p->uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 
0;
+   p->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
spin_unlock_irq(&mdev->ldev->md.uuid_lock);
 
mdev->comm_bm_set = drbd_bm_total_weight(mdev);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 76bb3a6..2af26fc 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1230,6 +1230,11 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct 
genl_info *info)
else
mdev->ldev->md.flags |= MDF_AL_DISABLED;
 
+   if (new_disk_conf->md_flushes)
+   clear_bit(MD_NO_FUA, &mdev->flags);
+   else
+   set_bit(MD_NO_FUA, &mdev->flags);
+
drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush);
 
drbd_md_sync(mdev);
@@ -3292,11 +3297,12 @@ void drbd_bcast_event(struct drbd_conf *mdev, const 
struct sib_info *sib)
unsigned seq;
int err = -ENOMEM;
 
-   if (sib->sib_reason == SIB_SYNC_PROGRESS &&
-   time_after(jiffies, mdev->rs_last_bcast + HZ))
-   mdev->rs_last_bcast = jiffies;
-   else
-   return;
+   if (sib->sib_reason == SIB_SYNC_PROGRESS) {
+   if (time_after(jiffies, mdev->rs_last_bcast + HZ))
+   mdev->rs_last_bcast = jiffies;
+   else
+   return;
+   }
 
seq = atomic_inc_return(&drbd_genl_seq);
msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index 1599a1a..a9eccfc 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1037,6 +1037,16 @@ randomize:
rcu_read_lock();
idr_for_each_entry(&tconn->volumes, mdev, vnr) {
kref_get(&mdev->kref);
+   /* Prevent a race between resync-handshake and
+* being promoted to Primary.
+*
+* Grab and release the state mutex, so we know that any current
+* drbd_set_role() is fin

[GIT PULL] drbd-8.4.2 for the linux-3.8 merge window

2012-11-09 Thread Philipp Reisner

Hi Jens,

Please pull drbd-8.4.2 into your for-3.8/drivers branch.

The most noticeable change is the support for multiple replicated
volumes in a single DRBD connection.

This release requires new drbd userland tools >= 8.4.0 (available
since July 2011).
8.4.x is network protocol compatible with all previous releaes.
This release brings a new meta-data format. Forward (8.3 -> 8.4)
conversion happens complete seamless. Backward conversion
is done by a single command (drbdadm apply-al res).

The "recent changes" chapter of our user's guide describes all
changes in detail:
http://www.drbd.org/users-guide/ap-recent-changes.html

Changelog

8.4.2 (api:genl1/proto:86-100)

 * Go into inconsistent disk state with on-io-error=pass-on policy
 * Timeouts for requests processing on the peer (previously that
   worked only if the data socket was congested)
 * Conflicting write detection is now based on an interval tree,
   removed the hash-tables (necessary for the unlimited BIO sizes)
 * Support for multiple volumes (minors, block devices) per connection;
   up to 65536 volumes per connection supported
 * Reduced IO latencies during some state changes (esp. start resync)
 * New on disk format for the AL: double capacity; 4k aligned IO; same space
 * Multiple AL changes in a single transaction (precondition for
   unlimited BIO sizes)
 * DRBD no longer imposes any limit on BIO sizes
 * Removed DRBD's limits on the number of minor devices
 * DRBD's minors can now be removed (not only unconfigured)
 * Switched the user space interface form connector to generic netlink
 * The wire-protocol is now a regular connection option, which can be
   changed while the device is online
 * IO freezing/thawing is done on connection (all volumes) level
 * fencing is done on connection (all volumes) level
 * Enforce application of activity log after primary crash in user space
 * New default values (compared to drbd-8.3) for: minor-count, ko-count, 
al-extents,
   c-plan-ahead, c-fill-target, c-min-rate, use-rle, on-io-error
 * Optional load balancing for read requests: new keyword "read-balance"
 * New option 'al-updates no' to disable writing transactions into the
   activity log. It is use full if you prefer a full sync after a primary
   crash, for improved performance of a spread out random write work load
 * Expose the data generation identifies via sysfs


Jens, regarding the code:

It has the sysfs bits in again. The reason for that is that we want to
expose more information by that, and remove the /proc/drbd with the
next evolutionary step. -- In case this is a show stopper, let me
remove the sysfs bits.


Here is the git-pull-request test: 
(The patch subjects removed to make the mail more digestible)

The following changes since commit ccae7868b0c5697508a541c531cf96b361d62c1c:

  drbd: log request sector offset and size for IO errors (2012-10-30 08:39:18 
+0100)

are available in the git repository at:

  git://git.drbd.org/linux-drbd.git for-jens_drbd-8.4.2

for you to fetch changes up to e877f7fdc0b1052b0e881e61f9290268eb21aa2f:

  drbd: use copy_highpage (2012-11-09 12:06:44 +0100)


Akinobu Mita (1):
  drbd: use copy_highpage

Andreas Gruenbacher (210):
  drbd: Get rid of req_validator_fn typedef
[...]

David Howells (1):
  DRBD: Fix comparison always false warning due to long/long long compare

Jing Wang (1):
  drbd: check return of kmalloc in receive_uuids

Lars Ellenberg (138):
  drbd: simplify condition in drbd_may_do_local_read()
[...]

Philipp Marek (1):
      drbd: pass some more information to userspace.

Philipp Reisner (234):
  idr: idr_for_each_entry() macro
[...]

 drivers/block/drbd/Makefile|2 +
 drivers/block/drbd/drbd_actlog.c   |  689 +++
 drivers/block/drbd/drbd_bitmap.c   |  227 ++-
 drivers/block/drbd/drbd_int.h  | 1399 ++---
 drivers/block/drbd/drbd_interval.c |  207 ++
 drivers/block/drbd/drbd_interval.h |   40 +
 drivers/block/drbd/drbd_main.c | 3918 ++--
 drivers/block/drbd/drbd_nl.c   | 3391 ++-
 drivers/block/drbd/drbd_nla.c  |   55 +
 drivers/block/drbd/drbd_nla.h  |8 +
 drivers/block/drbd/drbd_proc.c |   33 +-
 drivers/block/drbd/drbd_receiver.c | 3883 ---
 drivers/block/drbd/drbd_req.c  | 1569 +++
 drivers/block/drbd/drbd_req.h  |  187 +-
 drivers/block/drbd/drbd_state.c| 1857 +
 drivers/block/drbd/drbd_state.h|  161 ++
 drivers/block/drbd/drbd_strings.c  |1 +
 drivers/block/drbd/drbd_sysfs.c|   86 +
 drivers/block/drbd/drbd_worker.c   | 1168 ++-
 drivers/block/drbd/drbd_wrappers.h |   11 +-
 include/linux/drbd.h   |   81 +-
 include/linux/drbd_genl.h  |  378 
 include/linux/drbd_genl_api.h

Re: [GIT PULL] drbd-8.4.2 for the linux-3.8 merge window

2012-11-09 Thread Philipp Reisner

[...]
> > It has the sysfs bits in again. The reason for that is that we want to
> > expose more information by that, and remove the /proc/drbd with the
> > next evolutionary step. -- In case this is a show stopper, let me
> > remove the sysfs bits.
> 
> The exact same sysfs bits I complained about last time? If yes, then I
> don't understand why you haven't changed yet. Or why you are trying to
> push the same bits again that got rejected last time.
> 

I had the impression it was rejected because I submitted the pull request
too late to you. In the sense of, it might go in, if it gets submitted
for inclusion before the merge window opens...
Apparently my impression was wrong. You will get an updated pull-request 
with the sysfs bits removed

> > Here is the git-pull-request test:
> > (The patch subjects removed to make the mail more digestible)
> 
> Please don't do that, it basically makes the pull request useless! A few
> hundred extra lines is not an issue.

Ok.

I intend to send the updated pull-request later today.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [GIT PULL] drbd-8.4.2 for the linux-3.8 merge window

2012-11-09 Thread Philipp Reisner

o: properly initialize page->private
  drbd: grammar fix in log message
  drbd: fix access of unallocated pages and kernel panic
  drbd: fix local read error hung forever
  drbd: fix wrong assert in completion/retry path of failed local reads
  drbd: rename drbd_restart_write to drbd_restart_request
  drbd: transfer log epoch numbers are now per resource
  drbd: allow to dequeue batches of work at a time
  drbd: move the drbd_work_queue from drbd_socket to drbd_connection
  drbd: remove struct drbd_tl_epoch objects (barrier works)
  drbd: better separate WRITE and READ code paths in drbd_make_request
  drbd: __drbd_make_request() is now void
  drbd: introduce completion_ref and kref to struct drbd_request
  drbd: base completion and destruction of requests on ref counts
  drbd: __req_mod: make DISCARD_WRITE and independend case
  drbd: allow read requests to be retried after force-detach
  drbd: take error path in drbd_adm_down if interrupted by signal
  drbd: fix null pointer dereference with on-congestion policy when diskless
  drbd: cleanup, remove two unused global flags
  drbd: differentiate between normal and forced detach
  drbd: report congestion if we are waiting for some userland callback
  drbd: reset congestion information before reporting it in /proc/drbd
  drbd: do not reset rs_pending_cnt too early
  drbd: call local-io-error handler early
  drbd: flush drbd work queue before invalidate/invalidate remote
  drbd: introduce stop-sector to online verify
  drbd: announce FLUSH/FUA capability to upper layers
  drbd: fix drbd wire compatibility for empty flushes
  drbd: Fix a potential issue with the DISCARD_CONCURRENT flag
  drbd: only start a new epoch, if the current epoch contains writes
  drbd: NEG_ACK does not imply a barrier-ack
  drbd: cleanup, drop unused struct
  drbd: disambiguation, s/P_DISCARD_WRITE/P_SUPERSEDED/
  drbd: disambiguation, s/DISCARD_CONCURRENT/RESOLVE_CONFLICTS/
  drbd: disambiguation, s/ERR_DISCARD/ERR_DISCARD_IMPOSSIBLE/
  drbd: properly call drbd_rs_cancel_all() in drbd_disconnected()
  drbd: don't send out P_BARRIER with stale information
  drbd: temporarily suspend io in drbd_adm_disk_opts
  drbd: mutex_unlock "... must no be used in interrupt context"
  drbd: dequeue single work items in wait_for_work()
  drbd: fix potential list_add corruption
  drbd: differentiate early and later "postponing" of requests
  drbd: use list_move_tail instead of list_del/list_add_tail
  drbd: fix potential deadlock during bitmap (re-)allocation
  drbd: a few more GFP_KERNEL -> GFP_NOIO
  drbd: wait for meta data IO completion even with failed disk, unless 
force-detached
  drbd: always write bitmap on detach
  drbd: log request sector offset and size for IO errors
  drbd: if the replication link breaks during handshake, keep retrying

Philipp Marek (1):
  drbd: pass some more information to userspace.

Philipp Reisner (232):
  idr: idr_for_each_entry() macro
  drbd: Minimal struct drbd_tconn
  drbd: moved net_conf from mdev to tconn
  drbd: moved net_cont and net_cnt_wait from mdev to tconn
  drbd: moved data and meta from mdev to tconn
  drbd: moved receiver, worker and asender from mdev to tconn
  drbd: moved agreed_pro_version, last_received and ko_count to tconn
  drbd: moved req_lock and transfer log from mdev to tconn
  drbd: moved crypto transformations and friends from mdev to tconn
  drbd: Made drbd_flush_workqueue() to take a tconn instead of an mdev
  drbd: Preparing to use p_header96 for all packets
  drbd: Replaced all p_header80 with a generic p_header
  drbd: Use new header layout
  drbd: Implemented receiving of new style packets on meta socket
  drbd: Do not access tconn after it was freed
  drbd: Moved the state functions into its own source file
  drbd: Moved the thread name into the data structure
  drbd: Eliminated the user of drbd_task_to_thread()
  drbd: Moved code
  drbd: Do no sleep long in drbd_start_resync
  drbd: Revert "Make sure we dont send state if a cluster wide state change 
is in progress"
  drbd: Moving state related macros to drbd_state.h
  drbd: conn_printk() a dev_printk() alike for drbd's connections
  drbd: Converted drbd_try_connect() from mdev to tconn
  drbd: Converted drbd_wait_for_connect() from mdev to tconn
  drbd: Started to separated connection flags (tconn) from block device 
flags (mdev)
  drbd: Moved DISCARD_CONCURRENT to the per connection (tconn) flags
  drbd: Moved SEND_PING to the per connection (tconn) flags
  drbd: Moved SIGNAL_ASENDER to the per connection (tconn) flags
  drbd: Converted wake_asender() and request_ping() from mdev to tconn
  drbd: Converted hel

Re: [GIT PULL] drbd-8.4.2 for the linux-3.8 merge window

2012-11-09 Thread Philipp Reisner

Am Freitag, 9. November 2012, 15:50:24 schrieb Jens Axboe:
> On 2012-11-09 15:18, Jens Axboe wrote:
> > On 2012-11-09 14:33, Philipp Reisner wrote:
> >> Jens, here it is without the sysfs stuff
> > 
> > Thanks, pulled into for-3.8/drivers
> 
> I didn't say anything, but I've been fuming a bit the last few series of
> merge windows. You need to stop these insanely massive pull requests.
> I've been large since this is "just a driver", but it can't continue. We
> should have reached stability a long time ago. Your pull requests
> contain a shit load of items, are you guys paying per commit? Look at
> these:
> 
>   drbd: Request lookup code cleanup (1)
>   drbd: Request lookup code cleanup (2)
>   drbd: Request lookup code cleanup (3)
>   drbd: Request lookup code cleanup (4)
> 

We are living there in the belief that we should break up big
changes in review able chunks

> or
> 
>   drbd: conn_send_cmd2(): Return 0 upon success and an error code
> otherwise drbd: _conn_send_cmd(): Return 0 upon success and an error code
> otherwise drbd: _drbd_send_cmd(): Return 0 upon success and an error code
> otherwise drbd: conn_send_cmd(): Return 0 upon success and an error code
> otherwise
> 

Function by function gets converted to the "return 0 upon success" call
semantics.
Do you prefer that all of that should be done in a single commit?

> along with FIFTY or so more of these. WTF is this?
> 
>   drbd: Converted helper functions for drbd_send() to tconn
>   drbd: Converted drbd_send() from mdev to tconn
>   drbd: Converted drbd_send_fp() from mdev to tconn
> 

Should it instead be in a single commit?

> 
> I don't think I need to go on. So from now on, to get items into the
> kernel, what you will do is:
> 
> - Stop doing insane commits like the above. It just doesn't make sense.
> 
> - Send pull requests in a timely fashion. No more of this "lets collect
>   ALL the things" then send it off. Collect small bug fixes, send those
>   off. Develop some feature or make some changes, send that off. Etc.

That works well for individual features, and we have been doing that
for the last two Years.

But at this time we changed the object model. In the old code we had
a single kind of DRBD-in-kernel-object: a resource
Now we have two kinds: resources and volumes.

8.3: a resource had a single implicit volume

8.4: a resource might contain multiple volumes, each volume belongs to
 a single resource.

In the next ~12 month you will get only small features/updates etc... for
the 8.4 code base.

> The fact that your initial pull request had to MASK all these commits
> should have rung big bells in your head. It's a clear sign of a huge
> problem in your development model. If you can't clean this up, then
> it's not going in.

Fundamental changes in our object model require such huge change sets.
Jens, we will not stop where we are today. We plan to introduce a new
object: a connection. (The ability to mirror for one machine to *multiple*
receivers.)

Is it a better fit to introduce it then as a new driver? 
E.g. called it "drbd9". 
Should it use a new major number?

Best,
 Phil
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 5/6] drbd: Constants should be UPPERCASE

2013-06-14 Thread Philipp Reisner

Signed-off-by: Andreas Gruenbacher 
Signed-off-by: Philipp Reisner 
---
 drivers/block/drbd/drbd_int.h  |7 ++-
 drivers/block/drbd/drbd_nl.c   |   20 ++--
 drivers/block/drbd/drbd_receiver.c |6 +++---
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index f104328..4519d63 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1467,7 +1467,12 @@ extern void drbd_suspend_io(struct drbd_conf *mdev);
 extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
 extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev 
*, sector_t, int);
-enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew 
= 2 };
+enum determine_dev_size {
+   DS_ERROR = -1,
+   DS_UNCHANGED = 0,
+   DS_SHRUNK = 1,
+   DS_GREW = 2
+};
 extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, 
enum dds_flags) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index e25803b..45d1275 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -835,7 +835,7 @@ enum determine_dev_size drbd_determine_dev_size(struct 
drbd_conf *mdev, enum dds
char ppb[10];
 
int md_moved, la_size_changed;
-   enum determine_dev_size rv = unchanged;
+   enum determine_dev_size rv = DS_UNCHANGED;
 
/* race:
 * application request passes inc_ap_bio,
@@ -878,7 +878,7 @@ enum determine_dev_size drbd_determine_dev_size(struct 
drbd_conf *mdev, enum dds
"Leaving size unchanged at size = %lu KB\n",
(unsigned long)size);
}
-   rv = dev_size_error;
+   rv = DS_ERROR;
}
/* racy, see comments above. */
drbd_set_my_capacity(mdev, size);
@@ -886,7 +886,7 @@ enum determine_dev_size drbd_determine_dev_size(struct 
drbd_conf *mdev, enum dds
dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
 (unsigned long long)size>>1);
}
-   if (rv == dev_size_error)
+   if (rv == DS_ERROR)
goto out;
 
la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect);
@@ -905,16 +905,16 @@ enum determine_dev_size drbd_determine_dev_size(struct 
drbd_conf *mdev, enum dds
err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : 
&drbd_bm_write,
 "size changed", BM_LOCKED_MASK);
if (err) {
-   rv = dev_size_error;
+   rv = DS_ERROR;
goto out;
}
drbd_md_mark_dirty(mdev);
}
 
if (size > la_size_sect)
-   rv = grew;
+   rv = DS_GREW;
if (size < la_size_sect)
-   rv = shrunk;
+   rv = DS_SHRUNK;
 out:
lc_unlock(mdev->act_log);
wake_up(&mdev->al_wait);
@@ -1619,10 +1619,10 @@ int drbd_adm_attach(struct sk_buff *skb, struct 
genl_info *info)
set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
dd = drbd_determine_dev_size(mdev, 0);
-   if (dd == dev_size_error) {
+   if (dd == DS_ERROR) {
retcode = ERR_NOMEM_BITMAP;
goto force_diskless_dec;
-   } else if (dd == grew)
+   } else if (dd == DS_GREW)
set_bit(RESYNC_AFTER_NEG, &mdev->flags);
 
if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) ||
@@ -2387,13 +2387,13 @@ int drbd_adm_resize(struct sk_buff *skb, struct 
genl_info *info)
dd = drbd_determine_dev_size(mdev, ddsf);
drbd_md_sync(mdev);
put_ldev(mdev);
-   if (dd == dev_size_error) {
+   if (dd == DS_ERROR) {
retcode = ERR_NOMEM_BITMAP;
goto fail;
}
 
if (mdev->state.conn == C_CONNECTED) {
-   if (dd == grew)
+   if (dd == DS_GREW)
set_bit(RESIZE_PENDING, &mdev->flags);
 
drbd_send_uuids(mdev);
diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index adee58e..26852b8 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3545,7 +3545,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct 
packet_info *pi)
 {
struct drbd_conf *mdev;
struct p_sizes *p = pi->data;
-   enum determine_dev_size dd = unchanged;
+   enum determine_dev_size dd = DS_UNCHANGED;
sector_t p_si

[PATCH 4/6] drbd: Ignore the exit code of a fence-peer handler if it returns too late

2013-06-14 Thread Philipp Reisner

In case the connection was established and lost again before
the a fence-peer handler returns, ignore the exit code of this
instance. (And use the exit code of the later started instance)

Signed-off-by: Andreas Gruenbacher 
Signed-off-by: Philipp Reisner 
---
 drivers/block/drbd/drbd_int.h   |1 +
 drivers/block/drbd/drbd_nl.c|   15 +--
 drivers/block/drbd/drbd_state.c |4 +++-
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index f943aac..f104328 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -832,6 +832,7 @@ struct drbd_tconn { /* is a resource from 
the config file */
unsigned susp_nod:1;/* IO suspended because no data */
unsigned susp_fen:1;/* IO suspended because fence peer 
handler runs */
struct mutex cstate_mutex;  /* Protects graceful disconnects */
+   unsigned int connect_cnt;   /* Inc each time a connection is 
established */
 
unsigned long flags;
struct net_conf *net_conf;  /* content protected by rcu */
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 0936d6a..e25803b 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -417,6 +417,7 @@ static enum drbd_fencing_p highest_fencing_policy(struct 
drbd_tconn *tconn)
 
 bool conn_try_outdate_peer(struct drbd_tconn *tconn)
 {
+   unsigned int connect_cnt;
union drbd_state mask = { };
union drbd_state val = { };
enum drbd_fencing_p fp;
@@ -428,6 +429,10 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
return false;
}
 
+   spin_lock_irq(&tconn->req_lock);
+   connect_cnt = tconn->connect_cnt;
+   spin_unlock_irq(&tconn->req_lock);
+
fp = highest_fencing_policy(tconn);
switch (fp) {
case FP_NOT_AVAIL:
@@ -492,8 +497,14 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
   here, because we might were able to re-establish the connection in 
the
   meantime. */
spin_lock_irq(&tconn->req_lock);
-   if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, 
&tconn->flags))
-   _conn_request_state(tconn, mask, val, CS_VERBOSE);
+   if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, 
&tconn->flags)) {
+   if (tconn->connect_cnt != connect_cnt)
+   /* In case the connection was established and droped
+  while the fence-peer handler was running, ignore it 
*/
+   conn_info(tconn, "Ignoring fence-peer exit code\n");
+   else
+   _conn_request_state(tconn, mask, val, CS_VERBOSE);
+   }
spin_unlock_irq(&tconn->req_lock);
 
return conn_highest_pdsk(tconn) <= D_OUTDATED;
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 90c5be2..216d47b 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1115,8 +1115,10 @@ __drbd_set_state(struct drbd_conf *mdev, union 
drbd_state ns,
drbd_thread_restart_nowait(&mdev->tconn->receiver);
 
/* Resume AL writing if we get a connection */
-   if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+   if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
drbd_resume_al(mdev);
+   mdev->tconn->connect_cnt++;
+   }
 
/* remember last attach time so request_timer_fn() won't
 * kill newly established sessions while we are still trying to thaw
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/6] drbd: Fix rcu_read_lock balance on error path

2013-06-14 Thread Philipp Reisner

From: Andreas Gruenbacher 

Signed-off-by: Andreas Gruenbacher 
Signed-off-by: Philipp Reisner 
---
 drivers/block/drbd/drbd_nl.c |   19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 9e3f441..0936d6a 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2658,7 +2658,6 @@ int nla_put_status_info(struct sk_buff *skb, struct 
drbd_conf *mdev,
const struct sib_info *sib)
 {
struct state_info *si = NULL; /* for sizeof(si->member); */
-   struct net_conf *nc;
struct nlattr *nla;
int got_ldev;
int err = 0;
@@ -2688,13 +2687,19 @@ int nla_put_status_info(struct sk_buff *skb, struct 
drbd_conf *mdev,
goto nla_put_failure;
 
rcu_read_lock();
-   if (got_ldev)
-   if (disk_conf_to_skb(skb, 
rcu_dereference(mdev->ldev->disk_conf), exclude_sensitive))
-   goto nla_put_failure;
+   if (got_ldev) {
+   struct disk_conf *disk_conf;
 
-   nc = rcu_dereference(mdev->tconn->net_conf);
-   if (nc)
-   err = net_conf_to_skb(skb, nc, exclude_sensitive);
+   disk_conf = rcu_dereference(mdev->ldev->disk_conf);
+   err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
+   }
+   if (!err) {
+   struct net_conf *nc;
+
+   nc = rcu_dereference(mdev->tconn->net_conf);
+   if (nc)
+   err = net_conf_to_skb(skb, nc, exclude_sensitive);
+   }
rcu_read_unlock();
if (err)
goto nla_put_failure;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/6] drbd: fix error return code in drbd_init()

2013-06-14 Thread Philipp Reisner

From: Wei Yongjun 

Fix to return a negative error code from the error handling
case instead of 0, as returned elsewhere in this function.

Signed-off-by: Wei Yongjun 
Signed-off-by: Lars Ellenberg 
Signed-off-by: Andreas Gruenbacher 
Signed-off-by: Philipp Reisner 
---
 drivers/block/drbd/drbd_main.c |4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index a5dca6a..49040a3 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2762,8 +2762,6 @@ int __init drbd_init(void)
/*
 * allocate all necessary structs
 */
-   err = -ENOMEM;
-
init_waitqueue_head(&drbd_pp_wait);
 
drbd_proc = NULL; /* play safe for drbd_cleanup */
@@ -2773,6 +2771,7 @@ int __init drbd_init(void)
if (err)
goto fail;
 
+   err = -ENOMEM;
drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, 
&drbd_proc_fops, NULL);
if (!drbd_proc) {
printk(KERN_ERR "drbd: unable to register proc file\n");
@@ -2803,7 +2802,6 @@ int __init drbd_init(void)
 fail:
drbd_cleanup();
if (err == -ENOMEM)
-   /* currently always the case */
printk(KERN_ERR "drbd: ran out of memory\n");
else
printk(KERN_ERR "drbd: initialization failure\n");
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 6/6] drbd: Allow online change of al-stripes and al-stripe-size

2013-06-14 Thread Philipp Reisner

Allow to change the AL layout with an resize operation. For that
the reisze command gets two new fields: al_stripes and al_stripe_size.

In order to make the operation crash save:
1) Lock out all IO and MD-IO
2) Write the super block with MDF_PRIMARY_IND clear
3) write the bitmap to the new location (all zeros, since
   we allow only while connected)
4) Initialize the new AL-area
5) Write the super block with the restored MDF_PRIMARY_IND.
6) Unfreeze all IO

Since the AL-layout has no influence on the protocol, this operation
needs to be beforemed on both sides of a resource (if intended).

Signed-off-by: Andreas Gruenbacher 
Signed-off-by: Philipp Reisner 
---
 drivers/block/drbd/drbd_actlog.c   |   21 ++
 drivers/block/drbd/drbd_int.h  |7 +-
 drivers/block/drbd/drbd_main.c |   57 ---
 drivers/block/drbd/drbd_nl.c   |  137 +---
 drivers/block/drbd/drbd_receiver.c |2 +-
 include/linux/drbd.h   |6 +-
 include/linux/drbd_genl.h  |2 +
 include/linux/drbd_limits.h|9 +++
 8 files changed, 188 insertions(+), 53 deletions(-)

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 6608076..28c73ca 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -659,6 +659,27 @@ void drbd_al_shrink(struct drbd_conf *mdev)
wake_up(&mdev->al_wait);
 }
 
+int drbd_initialize_al(struct drbd_conf *mdev, void *buffer)
+{
+   struct al_transaction_on_disk *al = buffer;
+   struct drbd_md *md = &mdev->ldev->md;
+   sector_t al_base = md->md_offset + md->al_offset;
+   int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
+   int i;
+
+   memset(al, 0, 4096);
+   al->magic = cpu_to_be32(DRBD_AL_MAGIC);
+   al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
+   al->crc32c = cpu_to_be32(crc32c(0, al, 4096));
+
+   for (i = 0; i < al_size_4k; i++) {
+   int err = drbd_md_sync_page_io(mdev, mdev->ldev, al_base + i * 
8, WRITE);
+   if (err)
+   return err;
+   }
+   return 0;
+}
+
 static int w_update_odbm(struct drbd_work *w, int unused)
 {
struct update_odbm_work *udw = container_of(w, struct update_odbm_work, 
w);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 4519d63..2d7f608 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1133,6 +1133,7 @@ extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
 void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
 
 extern void conn_md_sync(struct drbd_tconn *tconn);
+extern void drbd_md_write(struct drbd_conf *mdev, void *buffer);
 extern void drbd_md_sync(struct drbd_conf *mdev);
 extern int  drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev 
*bdev);
 extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) 
__must_hold(local);
@@ -1468,12 +1469,15 @@ extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
 extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev 
*, sector_t, int);
 enum determine_dev_size {
+   DS_ERROR_SHRINK = -3,
+   DS_ERROR_SPACE_MD = -2,
DS_ERROR = -1,
DS_UNCHANGED = 0,
DS_SHRUNK = 1,
DS_GREW = 2
 };
-extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, 
enum dds_flags) __must_hold(local);
+extern enum determine_dev_size
+drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct 
resize_parms *) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
 extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
@@ -1639,6 +1643,7 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, 
sector_t sector,
 #define drbd_set_out_of_sync(mdev, sector, size) \
__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
 extern void drbd_al_shrink(struct drbd_conf *mdev);
+extern int drbd_initialize_al(struct drbd_conf *, void *);
 
 /* drbd_nl.c */
 /* state info broadcast */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 49040a3..55635ed 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2879,34 +2879,14 @@ struct meta_data_on_disk {
u8 reserved_u8[4096 - (7*8 + 10*4)];
 } __packed;
 
-/**
- * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit 
is set
- * @mdev:  DRBD device.
- */
-void drbd_md_sync(struct drbd_conf *mdev)
+
+
+void drbd_md_write(struct drbd_conf *mdev, void *b)
 {
-   struct meta_data_on_disk *buffer;
+   struct meta_data_on_disk *buffer = b;
sector_t sector;
int i;
 
-   /* Don't accidentally change the DRBD meta data layout. */
-   BUILD_

[PATCH 0/6] RFC: DRBD fixes and one functional enhancemet

2013-06-14 Thread Philipp Reisner

The first 4 patches are obvious fixes. The last patch adds
the capability to change the activity log layout online.
(al-stripes and al-stripe-size)

Andreas Gruenbacher (2):
  drbd: Do not sleep inside rcu
  drbd: Fix rcu_read_lock balance on error path

Philipp Reisner (3):
  drbd: Ignore the exit code of a fence-peer handler if it returns too late
  drbd: Constants should be UPPERCASE
  drbd: Allow online change of al-stripes and al-stripe-size

Wei Yongjun (1):
  drbd: fix error return code in drbd_init()

 drivers/block/drbd/drbd_actlog.c   |   21 
 drivers/block/drbd/drbd_int.h  |   15 ++-
 drivers/block/drbd/drbd_main.c |   61 ++--
 drivers/block/drbd/drbd_nl.c   |  185 
 drivers/block/drbd/drbd_receiver.c |   12 +--
 drivers/block/drbd/drbd_state.c|4 +-
 include/linux/drbd.h   |6 +-
 include/linux/drbd_genl.h  |2 +
 include/linux/drbd_limits.h|9 ++
 9 files changed, 236 insertions(+), 79 deletions(-)

-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/6] drbd: Do not sleep inside rcu

2013-06-14 Thread Philipp Reisner

From: Andreas Gruenbacher 

Signed-off-by: Andreas Gruenbacher 
Signed-off-by: Philipp Reisner 
---
 drivers/block/drbd/drbd_receiver.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index 4222aff..adee58e 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1039,6 +1039,8 @@ randomize:
rcu_read_lock();
idr_for_each_entry(&tconn->volumes, mdev, vnr) {
kref_get(&mdev->kref);
+   rcu_read_unlock();
+
/* Prevent a race between resync-handshake and
 * being promoted to Primary.
 *
@@ -1049,8 +1051,6 @@ randomize:
mutex_lock(mdev->state_mutex);
mutex_unlock(mdev->state_mutex);
 
-   rcu_read_unlock();
-
if (discard_my_data)
set_bit(DISCARD_MY_DATA, &mdev->flags);
else
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/6] drbd: fix error return code in drbd_init()

2013-06-25 Thread Philipp Reisner

From: Wei Yongjun 

Fix to return a negative error code from the error handling
case instead of 0, as returned elsewhere in this function.

Signed-off-by: Wei Yongjun 
Signed-off-by: Lars Ellenberg 
Signed-off-by: Andreas Gruenbacher 
Signed-off-by: Philipp Reisner 
---
 drivers/block/drbd/drbd_main.c |4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index a5dca6a..49040a3 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2762,8 +2762,6 @@ int __init drbd_init(void)
/*
 * allocate all necessary structs
 */
-   err = -ENOMEM;
-
init_waitqueue_head(&drbd_pp_wait);
 
drbd_proc = NULL; /* play safe for drbd_cleanup */
@@ -2773,6 +2771,7 @@ int __init drbd_init(void)
if (err)
goto fail;
 
+   err = -ENOMEM;
drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, 
&drbd_proc_fops, NULL);
if (!drbd_proc) {
printk(KERN_ERR "drbd: unable to register proc file\n");
@@ -2803,7 +2802,6 @@ int __init drbd_init(void)
 fail:
drbd_cleanup();
if (err == -ENOMEM)
-   /* currently always the case */
printk(KERN_ERR "drbd: ran out of memory\n");
else
printk(KERN_ERR "drbd: initialization failure\n");
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/6] drbd: Do not sleep inside rcu

2013-06-25 Thread Philipp Reisner

From: Andreas Gruenbacher 

Signed-off-by: Andreas Gruenbacher 
---
 drivers/block/drbd/drbd_receiver.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index 4222aff..adee58e 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1039,6 +1039,8 @@ randomize:
rcu_read_lock();
idr_for_each_entry(&tconn->volumes, mdev, vnr) {
kref_get(&mdev->kref);
+   rcu_read_unlock();
+
/* Prevent a race between resync-handshake and
 * being promoted to Primary.
 *
@@ -1049,8 +1051,6 @@ randomize:
mutex_lock(mdev->state_mutex);
mutex_unlock(mdev->state_mutex);
 
-   rcu_read_unlock();
-
if (discard_my_data)
set_bit(DISCARD_MY_DATA, &mdev->flags);
else
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/6] DRBD fixes and one functional enhancemet

2013-06-25 Thread Philipp Reisner

Hi Jens,

Please consider to apply these patches to your for-3.11/drivers branch.

The first 4 patches are obvious fixes. The last patch adds
the capability to change the activity log layout online.
(al-stripes and al-stripe-size)

I posted these patches on the 14th for review.


Andreas Gruenbacher (2):
  drbd: Do not sleep inside rcu
  drbd: Fix rcu_read_lock balance on error path

Philipp Reisner (3):
  drbd: Ignore the exit code of a fence-peer handler if it returns too
late
  drbd: Constants should be UPPERCASE
  drbd: Allow online change of al-stripes and al-stripe-size

Wei Yongjun (1):
  drbd: fix error return code in drbd_init()

 drivers/block/drbd/drbd_actlog.c   |   21 
 drivers/block/drbd/drbd_int.h  |   15 ++-
 drivers/block/drbd/drbd_main.c |   61 ++--
 drivers/block/drbd/drbd_nl.c   |  185 
 drivers/block/drbd/drbd_receiver.c |   12 +--
 drivers/block/drbd/drbd_state.c|4 +-
 include/linux/drbd.h   |6 +-
 include/linux/drbd_genl.h  |2 +
 include/linux/drbd_limits.h|9 ++
 9 files changed, 236 insertions(+), 79 deletions(-)

-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/6] drbd: Fix rcu_read_lock balance on error path

2013-06-25 Thread Philipp Reisner

From: Andreas Gruenbacher 

Signed-off-by: Andreas Gruenbacher 
Signed-off-by: Philipp Reisner 
---
 drivers/block/drbd/drbd_nl.c |   19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 9e3f441..0936d6a 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2658,7 +2658,6 @@ int nla_put_status_info(struct sk_buff *skb, struct 
drbd_conf *mdev,
const struct sib_info *sib)
 {
struct state_info *si = NULL; /* for sizeof(si->member); */
-   struct net_conf *nc;
struct nlattr *nla;
int got_ldev;
int err = 0;
@@ -2688,13 +2687,19 @@ int nla_put_status_info(struct sk_buff *skb, struct 
drbd_conf *mdev,
goto nla_put_failure;
 
rcu_read_lock();
-   if (got_ldev)
-   if (disk_conf_to_skb(skb, 
rcu_dereference(mdev->ldev->disk_conf), exclude_sensitive))
-   goto nla_put_failure;
+   if (got_ldev) {
+   struct disk_conf *disk_conf;
 
-   nc = rcu_dereference(mdev->tconn->net_conf);
-   if (nc)
-   err = net_conf_to_skb(skb, nc, exclude_sensitive);
+   disk_conf = rcu_dereference(mdev->ldev->disk_conf);
+   err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
+   }
+   if (!err) {
+   struct net_conf *nc;
+
+   nc = rcu_dereference(mdev->tconn->net_conf);
+   if (nc)
+   err = net_conf_to_skb(skb, nc, exclude_sensitive);
+   }
rcu_read_unlock();
if (err)
goto nla_put_failure;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 6/6] drbd: Allow online change of al-stripes and al-stripe-size

2013-06-25 Thread Philipp Reisner

Allow to change the AL layout with an resize operation. For that
the reisze command gets two new fields: al_stripes and al_stripe_size.

In order to make the operation crash save:
1) Lock out all IO and MD-IO
2) Write the super block with MDF_PRIMARY_IND clear
3) write the bitmap to the new location (all zeros, since
   we allow only while connected)
4) Initialize the new AL-area
5) Write the super block with the restored MDF_PRIMARY_IND.
6) Unfreeze all IO

Since the AL-layout has no influence on the protocol, this operation
needs to be beforemed on both sides of a resource (if intended).

Signed-off-by: Andreas Gruenbacher 
Signed-off-by: Philipp Reisner 
---
 drivers/block/drbd/drbd_actlog.c   |   21 ++
 drivers/block/drbd/drbd_int.h  |7 +-
 drivers/block/drbd/drbd_main.c |   57 ---
 drivers/block/drbd/drbd_nl.c   |  137 +---
 drivers/block/drbd/drbd_receiver.c |2 +-
 include/linux/drbd.h   |6 +-
 include/linux/drbd_genl.h  |2 +
 include/linux/drbd_limits.h|9 +++
 8 files changed, 188 insertions(+), 53 deletions(-)

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 6608076..28c73ca 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -659,6 +659,27 @@ void drbd_al_shrink(struct drbd_conf *mdev)
wake_up(&mdev->al_wait);
 }
 
+int drbd_initialize_al(struct drbd_conf *mdev, void *buffer)
+{
+   struct al_transaction_on_disk *al = buffer;
+   struct drbd_md *md = &mdev->ldev->md;
+   sector_t al_base = md->md_offset + md->al_offset;
+   int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
+   int i;
+
+   memset(al, 0, 4096);
+   al->magic = cpu_to_be32(DRBD_AL_MAGIC);
+   al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
+   al->crc32c = cpu_to_be32(crc32c(0, al, 4096));
+
+   for (i = 0; i < al_size_4k; i++) {
+   int err = drbd_md_sync_page_io(mdev, mdev->ldev, al_base + i * 
8, WRITE);
+   if (err)
+   return err;
+   }
+   return 0;
+}
+
 static int w_update_odbm(struct drbd_work *w, int unused)
 {
struct update_odbm_work *udw = container_of(w, struct update_odbm_work, 
w);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 4519d63..2d7f608 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1133,6 +1133,7 @@ extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
 void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
 
 extern void conn_md_sync(struct drbd_tconn *tconn);
+extern void drbd_md_write(struct drbd_conf *mdev, void *buffer);
 extern void drbd_md_sync(struct drbd_conf *mdev);
 extern int  drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev 
*bdev);
 extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) 
__must_hold(local);
@@ -1468,12 +1469,15 @@ extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
 extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev 
*, sector_t, int);
 enum determine_dev_size {
+   DS_ERROR_SHRINK = -3,
+   DS_ERROR_SPACE_MD = -2,
DS_ERROR = -1,
DS_UNCHANGED = 0,
DS_SHRUNK = 1,
DS_GREW = 2
 };
-extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, 
enum dds_flags) __must_hold(local);
+extern enum determine_dev_size
+drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct 
resize_parms *) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
 extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
@@ -1639,6 +1643,7 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, 
sector_t sector,
 #define drbd_set_out_of_sync(mdev, sector, size) \
__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
 extern void drbd_al_shrink(struct drbd_conf *mdev);
+extern int drbd_initialize_al(struct drbd_conf *, void *);
 
 /* drbd_nl.c */
 /* state info broadcast */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 49040a3..55635ed 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2879,34 +2879,14 @@ struct meta_data_on_disk {
u8 reserved_u8[4096 - (7*8 + 10*4)];
 } __packed;
 
-/**
- * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit 
is set
- * @mdev:  DRBD device.
- */
-void drbd_md_sync(struct drbd_conf *mdev)
+
+
+void drbd_md_write(struct drbd_conf *mdev, void *b)
 {
-   struct meta_data_on_disk *buffer;
+   struct meta_data_on_disk *buffer = b;
sector_t sector;
int i;
 
-   /* Don't accidentally change the DRBD meta data layout. */
-   BUILD_

[PATCH 5/6] drbd: Constants should be UPPERCASE

2013-06-25 Thread Philipp Reisner

Signed-off-by: Andreas Gruenbacher 
Signed-off-by: Philipp Reisner 
---
 drivers/block/drbd/drbd_int.h  |7 ++-
 drivers/block/drbd/drbd_nl.c   |   20 ++--
 drivers/block/drbd/drbd_receiver.c |6 +++---
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index f104328..4519d63 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1467,7 +1467,12 @@ extern void drbd_suspend_io(struct drbd_conf *mdev);
 extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
 extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev 
*, sector_t, int);
-enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew 
= 2 };
+enum determine_dev_size {
+   DS_ERROR = -1,
+   DS_UNCHANGED = 0,
+   DS_SHRUNK = 1,
+   DS_GREW = 2
+};
 extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, 
enum dds_flags) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index e25803b..45d1275 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -835,7 +835,7 @@ enum determine_dev_size drbd_determine_dev_size(struct 
drbd_conf *mdev, enum dds
char ppb[10];
 
int md_moved, la_size_changed;
-   enum determine_dev_size rv = unchanged;
+   enum determine_dev_size rv = DS_UNCHANGED;
 
/* race:
 * application request passes inc_ap_bio,
@@ -878,7 +878,7 @@ enum determine_dev_size drbd_determine_dev_size(struct 
drbd_conf *mdev, enum dds
"Leaving size unchanged at size = %lu KB\n",
(unsigned long)size);
}
-   rv = dev_size_error;
+   rv = DS_ERROR;
}
/* racy, see comments above. */
drbd_set_my_capacity(mdev, size);
@@ -886,7 +886,7 @@ enum determine_dev_size drbd_determine_dev_size(struct 
drbd_conf *mdev, enum dds
dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
 (unsigned long long)size>>1);
}
-   if (rv == dev_size_error)
+   if (rv == DS_ERROR)
goto out;
 
la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect);
@@ -905,16 +905,16 @@ enum determine_dev_size drbd_determine_dev_size(struct 
drbd_conf *mdev, enum dds
err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : 
&drbd_bm_write,
 "size changed", BM_LOCKED_MASK);
if (err) {
-   rv = dev_size_error;
+   rv = DS_ERROR;
goto out;
}
drbd_md_mark_dirty(mdev);
}
 
if (size > la_size_sect)
-   rv = grew;
+   rv = DS_GREW;
if (size < la_size_sect)
-   rv = shrunk;
+   rv = DS_SHRUNK;
 out:
lc_unlock(mdev->act_log);
wake_up(&mdev->al_wait);
@@ -1619,10 +1619,10 @@ int drbd_adm_attach(struct sk_buff *skb, struct 
genl_info *info)
set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
dd = drbd_determine_dev_size(mdev, 0);
-   if (dd == dev_size_error) {
+   if (dd == DS_ERROR) {
retcode = ERR_NOMEM_BITMAP;
goto force_diskless_dec;
-   } else if (dd == grew)
+   } else if (dd == DS_GREW)
set_bit(RESYNC_AFTER_NEG, &mdev->flags);
 
if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) ||
@@ -2387,13 +2387,13 @@ int drbd_adm_resize(struct sk_buff *skb, struct 
genl_info *info)
dd = drbd_determine_dev_size(mdev, ddsf);
drbd_md_sync(mdev);
put_ldev(mdev);
-   if (dd == dev_size_error) {
+   if (dd == DS_ERROR) {
retcode = ERR_NOMEM_BITMAP;
goto fail;
}
 
if (mdev->state.conn == C_CONNECTED) {
-   if (dd == grew)
+   if (dd == DS_GREW)
set_bit(RESIZE_PENDING, &mdev->flags);
 
drbd_send_uuids(mdev);
diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index adee58e..26852b8 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3545,7 +3545,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct 
packet_info *pi)
 {
struct drbd_conf *mdev;
struct p_sizes *p = pi->data;
-   enum determine_dev_size dd = unchanged;
+   enum determine_dev_size dd = DS_UNCHANGED;
sector_t p_si

[PATCH 4/6] drbd: Ignore the exit code of a fence-peer handler if it returns too late

2013-06-25 Thread Philipp Reisner

In case the connection was established and lost again before
the a fence-peer handler returns, ignore the exit code of this
instance. (And use the exit code of the later started instance)

Signed-off-by: Andreas Gruenbacher 
Signed-off-by: Philipp Reisner 
---
 drivers/block/drbd/drbd_int.h   |1 +
 drivers/block/drbd/drbd_nl.c|   15 +--
 drivers/block/drbd/drbd_state.c |4 +++-
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index f943aac..f104328 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -832,6 +832,7 @@ struct drbd_tconn { /* is a resource from 
the config file */
unsigned susp_nod:1;/* IO suspended because no data */
unsigned susp_fen:1;/* IO suspended because fence peer 
handler runs */
struct mutex cstate_mutex;  /* Protects graceful disconnects */
+   unsigned int connect_cnt;   /* Inc each time a connection is 
established */
 
unsigned long flags;
struct net_conf *net_conf;  /* content protected by rcu */
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 0936d6a..e25803b 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -417,6 +417,7 @@ static enum drbd_fencing_p highest_fencing_policy(struct 
drbd_tconn *tconn)
 
 bool conn_try_outdate_peer(struct drbd_tconn *tconn)
 {
+   unsigned int connect_cnt;
union drbd_state mask = { };
union drbd_state val = { };
enum drbd_fencing_p fp;
@@ -428,6 +429,10 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
return false;
}
 
+   spin_lock_irq(&tconn->req_lock);
+   connect_cnt = tconn->connect_cnt;
+   spin_unlock_irq(&tconn->req_lock);
+
fp = highest_fencing_policy(tconn);
switch (fp) {
case FP_NOT_AVAIL:
@@ -492,8 +497,14 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
   here, because we might were able to re-establish the connection in 
the
   meantime. */
spin_lock_irq(&tconn->req_lock);
-   if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, 
&tconn->flags))
-   _conn_request_state(tconn, mask, val, CS_VERBOSE);
+   if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, 
&tconn->flags)) {
+   if (tconn->connect_cnt != connect_cnt)
+   /* In case the connection was established and droped
+  while the fence-peer handler was running, ignore it 
*/
+   conn_info(tconn, "Ignoring fence-peer exit code\n");
+   else
+   _conn_request_state(tconn, mask, val, CS_VERBOSE);
+   }
spin_unlock_irq(&tconn->req_lock);
 
return conn_highest_pdsk(tconn) <= D_OUTDATED;
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 90c5be2..216d47b 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1115,8 +1115,10 @@ __drbd_set_state(struct drbd_conf *mdev, union 
drbd_state ns,
drbd_thread_restart_nowait(&mdev->tconn->receiver);
 
/* Resume AL writing if we get a connection */
-   if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+   if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
drbd_resume_al(mdev);
+   mdev->tconn->connect_cnt++;
+   }
 
/* remember last attach time so request_timer_fn() won't
 * kill newly established sessions while we are still trying to thaw
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 5/5] drbd: avoid to shrink max_bio_size due to peer re-configuration

2013-07-31 Thread Philipp Reisner

From: Lars Ellenberg 

For a long time, the receiving side has spread "too large" incoming
requests over multiple bios.  No need to shrink our max_bio_size
(max_hw_sectors) if the peer is reconfigured to use a different storage.

The problem manifests itself if we are not the top of the device stack
(DRBD is used a LVM PV).

A hardware reconfiguration on the peer may cause the supported
max_bio_size to shrink, and the connection handshake would now
unnecessarily shrink the max_bio_size on the active node.

There is no way to notify upper layers that they have to "re-stack"
their limits. So they won't notice at all, and may keep submitting bios
that are suddenly considered "too large for device".

We already check for compatibility and ignore changes on the peer,
the code only was masked out unless we have a fully established connection.
We just need to allow it a bit earlier during the handshake.

Also consider max_hw_sectors in our merge bvec function, just in case.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_nl.c  |4 ++--
 drivers/block/drbd/drbd_req.c |3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 37dad18..c706d50 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1132,9 +1132,9 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
/* We may ignore peer limits if the peer is modern enough.
   Because new from 8.3.8 onwards the peer can use multiple
   BIOs for a single peer_request */
-   if (mdev->state.conn >= C_CONNECTED) {
+   if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
if (mdev->tconn->agreed_pro_version < 94)
-   peer = min( mdev->peer_max_bio_size, 
DRBD_MAX_SIZE_H80_PACKET);
+   peer = min(mdev->peer_max_bio_size, 
DRBD_MAX_SIZE_H80_PACKET);
/* Correct old drbd (up to 8.3.7) if it believes it can 
do more than 32KiB */
else if (mdev->tconn->agreed_pro_version == 94)
peer = DRBD_MAX_SIZE_H80_PACKET;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index c24379f..fec7bef 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1306,6 +1306,7 @@ int drbd_merge_bvec(struct request_queue *q, struct 
bvec_merge_data *bvm, struct
int backing_limit;
 
if (bio_size && get_ldev(mdev)) {
+   unsigned int max_hw_sectors = queue_max_hw_sectors(q);
struct request_queue * const b =
mdev->ldev->backing_bdev->bd_disk->queue;
if (b->merge_bvec_fn) {
@@ -1313,6 +1314,8 @@ int drbd_merge_bvec(struct request_queue *q, struct 
bvec_merge_data *bvm, struct
limit = min(limit, backing_limit);
}
put_ldev(mdev);
+   if ((limit >> 9) > max_hw_sectors)
+   limit = max_hw_sectors << 9;
}
return limit;
 }
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/5] drbd: fix NULL pointer deref in module init error path

2013-07-31 Thread Philipp Reisner

From: Lars Ellenberg 

If we want to iterate over the (as of yet still empty) list in the
cleanup path, we need to initialize the list before the first goto fail.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_main.c |   19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 55635ed..9e3818b 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2750,13 +2750,6 @@ int __init drbd_init(void)
return err;
}
 
-   err = drbd_genl_register();
-   if (err) {
-   printk(KERN_ERR "drbd: unable to register generic netlink 
family\n");
-   goto fail;
-   }
-
-
register_reboot_notifier(&drbd_notifier);
 
/*
@@ -2767,6 +2760,15 @@ int __init drbd_init(void)
drbd_proc = NULL; /* play safe for drbd_cleanup */
idr_init(&minors);
 
+   rwlock_init(&global_state_lock);
+   INIT_LIST_HEAD(&drbd_tconns);
+
+   err = drbd_genl_register();
+   if (err) {
+   printk(KERN_ERR "drbd: unable to register generic netlink 
family\n");
+   goto fail;
+   }
+
err = drbd_create_mempools();
if (err)
goto fail;
@@ -2778,9 +2780,6 @@ int __init drbd_init(void)
goto fail;
}
 
-   rwlock_init(&global_state_lock);
-   INIT_LIST_HEAD(&drbd_tconns);
-
retry.wq = create_singlethread_workqueue("drbd-reissue");
if (!retry.wq) {
printk(KERN_ERR "drbd: unable to create retry workqueue\n");
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/5] drbd: Fix an connection drop issue after enabling allow-two-primaries

2013-07-31 Thread Philipp Reisner

Since drbd-8.4.0 it is possible to change the allow-two-primaries
network option while the connection is established.

The sequence code used to partially order packets from the
data socket with packets from the meta-data socket, still assued
that the allow-two-primaries option is constant while the
connection is established.

I.e.
On a node that has the RESOLVE_CONFLICTS bits set, after enabling
allow-two-primaries, when receiving the next data packet it timed out
while waiting for the necessary packets on the data socket to arrive
(wait_for_and_update_peer_seq() function).

Fixed that by always tracking the sequence number, but only waiting
for it if allow-two-primaries is set.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_receiver.c |   39 +++-
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index cc29cd3..12c59eb 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1890,29 +1890,11 @@ static u32 seq_max(u32 a, u32 b)
return seq_greater(a, b) ? a : b;
 }
 
-static bool need_peer_seq(struct drbd_conf *mdev)
-{
-   struct drbd_tconn *tconn = mdev->tconn;
-   int tp;
-
-   /*
-* We only need to keep track of the last packet_seq number of our peer
-* if we are in dual-primary mode and we have the resolve-conflicts 
flag set; see
-* handle_write_conflicts().
-*/
-
-   rcu_read_lock();
-   tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
-   rcu_read_unlock();
-
-   return tp && test_bit(RESOLVE_CONFLICTS, &tconn->flags);
-}
-
 static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
 {
unsigned int newest_peer_seq;
 
-   if (need_peer_seq(mdev)) {
+   if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)) {
spin_lock(&mdev->peer_seq_lock);
newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
mdev->peer_seq = newest_peer_seq;
@@ -1972,22 +1954,31 @@ static int wait_for_and_update_peer_seq(struct 
drbd_conf *mdev, const u32 peer_s
 {
DEFINE_WAIT(wait);
long timeout;
-   int ret;
+   int ret = 0, tp;
 
-   if (!need_peer_seq(mdev))
+   if (!test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags))
return 0;
 
spin_lock(&mdev->peer_seq_lock);
for (;;) {
if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
-   ret = 0;
break;
}
+
if (signal_pending(current)) {
ret = -ERESTARTSYS;
break;
}
+
+   rcu_read_lock();
+   tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
+   rcu_read_unlock();
+
+   if (!tp)
+   break;
+
+   /* Only need to wait if two_primaries is enabled */
prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
spin_unlock(&mdev->peer_seq_lock);
rcu_read_lock();
@@ -2228,8 +2219,10 @@ static int receive_Data(struct drbd_tconn *tconn, struct 
packet_info *pi)
}
goto out_interrupted;
}
-   } else
+   } else {
+   update_peer_seq(mdev, peer_seq);
spin_lock_irq(&mdev->tconn->req_lock);
+   }
list_add(&peer_req->w.list, &mdev->active_ee);
spin_unlock_irq(&mdev->tconn->req_lock);
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/5] drbd: Fix adding of new minors with freshly created meta data

2013-07-31 Thread Philipp Reisner

Online adding of new minors with freshly created meta data
to an resource with an established connection failed, with a
wrong state transition on one side on one side of the new minor.

Freshly created meta-data has a la_size (last agreed size) of 0.
When we online add such devices, the code wrongly got into
the code path for resyncing new storage that was added while
the disk was detached.

Fixed that by making the GREW from ZERO a special case.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_int.h |3 ++-
 drivers/block/drbd/drbd_nl.c  |2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 2d7f608..0e06f0c 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1474,7 +1474,8 @@ enum determine_dev_size {
DS_ERROR = -1,
DS_UNCHANGED = 0,
DS_SHRUNK = 1,
-   DS_GREW = 2
+   DS_GREW = 2,
+   DS_GREW_FROM_ZERO = 3,
 };
 extern enum determine_dev_size
 drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct 
resize_parms *) __must_hold(local);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 8cc1e64..37dad18 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -955,7 +955,7 @@ drbd_determine_dev_size(struct drbd_conf *mdev, enum 
dds_flags flags, struct res
}
 
if (size > la_size_sect)
-   rv = DS_GREW;
+   rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
if (size < la_size_sect)
rv = DS_SHRUNK;
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 4/5] drbd: fix decoding of bitmap vli rle for device sizes > 64 TB

2013-07-31 Thread Philipp Reisner

From: Lars Ellenberg 

Symptoms: disconnect after bitmap exchange due to
bitmap overflow (e:49731075554) while decoding bm RLE packet

In the decoding step of the variable length integer run length encoding
there was potentially an uncatched bitshift by wordsize (variable >> 64).

The result of which is "undefined" :(
(only "sometimes" the result is the desired 0)

Fix: don't do any bit shift magic for shift == 64, just assign.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_receiver.c |6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index 12c59eb..6fa6673 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -4125,7 +4125,11 @@ recv_bm_rle_bits(struct drbd_conf *mdev,
(unsigned int)bs.buf_len);
return -EIO;
}
-   look_ahead >>= bits;
+   /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; 
*/
+   if (likely(bits < 64))
+   look_ahead >>= bits;
+   else
+   look_ahead = 0;
have -= bits;
 
bits = bitstream_get_bits(&bs, &tmp, 64 - have);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/5] RFC: DRBD Fixes

2013-07-31 Thread Philipp Reisner

These are five independend fixes. Please see the commit messages
for descriptions.

The series is based on v3.11-rc3, and intended for the 3.12 merge
window.

I will post a second series targeted at the 3.12 merge window
in the next days. It will contain only renames (no functional changes)


Lars Ellenberg (3):
  drbd: fix NULL pointer deref in module init error path
  drbd: fix decoding of bitmap vli rle for device sizes > 64 TB
  drbd: avoid to shrink max_bio_size due to peer re-configuration

Philipp Reisner (2):
  drbd: Fix an connection drop issue after enabling allow-two-primaries
  drbd: Fix adding of new minors with freshly created meta data

 drivers/block/drbd/drbd_int.h  |3 ++-
 drivers/block/drbd/drbd_main.c |   19 ---
 drivers/block/drbd/drbd_nl.c   |6 ++---
 drivers/block/drbd/drbd_receiver.c |   45 +---
 drivers/block/drbd/drbd_req.c  |3 +++
 5 files changed, 38 insertions(+), 38 deletions(-)

-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] drbd: add module_put() on error path in drbd_proc_open()

2013-03-04 Thread Philipp Reisner

Hi Alexey,

Thanks a lot! I have put it into our tree.
http://git.drbd.org/gitweb.cgi?p=drbd-8.4.git

I intend to get it into the 3.10 merge window.

Best,
 Phil

> If single_open() fails in drbd_proc_open(), module refcount is left
> incremented. The patch adds module_put() on the error path.
> 
> Found by Linux Driver Verification project (linuxtesting.org).
> 
> Signed-off-by: Alexey Khoroshilov 
> ---
>  drivers/block/drbd/drbd_proc.c |   10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
> index 56672a6..30fe0a5 100644
> --- a/drivers/block/drbd/drbd_proc.c
> +++ b/drivers/block/drbd/drbd_proc.c
> @@ -313,8 +313,14 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
> 
>  static int drbd_proc_open(struct inode *inode, struct file *file)
>  {
> - if (try_module_get(THIS_MODULE))
> - return single_open(file, drbd_seq_show, PDE(inode)->data);
> + int err;
> +
> + if (try_module_get(THIS_MODULE)) {
> + err = single_open(file, drbd_seq_show, PDE(inode)->data);
> + if (err)
> + module_put(THIS_MODULE);
> + return err;
> + }
>   return -ENODEV;
>  }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [ 052/153] idr: idr_for_each_entry() macro

2013-03-04 Thread Philipp Reisner

Sure, here it is:
--

From: Philipp Reisner 

commit 9749f30f1a387070e6e8351f35aeb829eacc3ab6 upstream.

Inspired by the list_for_each_entry() macro

Signed-off-by: Ben Hutchings 
Signed-off-by: Philipp Reisner 
---
 include/linux/idr.h |   11 +++
 1 file changed, 11 insertions(+)

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 255491c..52a9da2 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -152,4 +152,15 @@ void ida_simple_remove(struct ida *ida, unsigned int id);
 
 void __init idr_init_cache(void);
 
+/**
+ * idr_for_each_entry - iterate over an idr's elements of a given type
+ * @idp: idr handle
+ * @entry:   the type * to use as cursor
+ * @id:  id entry's key
+ */
+#define idr_for_each_entry(idp, entry, id) \
+   for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \
+entry != NULL; \
+++id, entry = (typeof(entry))idr_get_next((idp), &(id)))
+
 #endif /* __IDR_H__ */

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: drbd: kernels 3.7 => 3.8 broken userspace compatibility

2013-05-06 Thread Philipp Reisner

Am Montag, 6. Mai 2013, 09:59:08 schrieb Chris Boot:
> Hi all,
> 
> I upgraded from a 3.7.x kernel to a 3.8.x kernel on a test machine
> running DRBD, and found myself unable to bring up my DRBD devices. I'm
> using the 8.3.13 userspace tools as shipped in Debian Wheezy, which work
> fine on the 3.7 kernel, but they appear to hang when using the 3.8
> kernel and cannot set up the device.
> 
> The 3.8 kernel appears to introduce drbd 8.4.2 rather than the 8.3.13
> available in 3.7.
> 
[...]
> Even if the kernel did require new userspace tools, should there not be
> some better mechanism to notify the user they must upgrade them before
> things will work? At the moment all I see without strace is:
> 
> # drbdadm attach r0
> DRBD module version: 8.4.2
>userland version: 8.3.13
> you should upgrade your drbd tools!
  
This message is a clear hint, isn't it?

See also https://lkml.org/lkml/2012/11/9/103

Fitting user space tools are available since 2011. The new tools
can deal with both interfaces.

Best,
 Phil
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3.9-stable] drbd: fix memory leak

2013-05-10 Thread Philipp Reisner

Yes, please.

> This patch looks like it should be in the 3.9-stable tree, should we apply
> it?
> 
> --
> 
> From: "Lars Ellenberg "
> 
> commit 94ad0a101415978be04945b2787be1e8e8a874db upstream
> 
> We forgot to free the disk_conf,
> so for each attach/detach cycle we leaked 336 bytes.
> 
> Signed-off-by: Philipp Reisner 
> Signed-off-by: Lars Ellenberg 
> Signed-off-by: Jens Axboe 
> Signed-off-by: Jonghwan Choi 
> ---
>  drivers/block/drbd/drbd_main.c |1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
> index e98da67..54d03d4 100644
> --- a/drivers/block/drbd/drbd_main.c
> +++ b/drivers/block/drbd/drbd_main.c
> @@ -2795,6 +2795,7 @@ void drbd_free_bc(struct drbd_backing_dev *ldev)
>   blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE |
> FMODE_EXCL);
>   blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
> 
> + kfree(ldev->disk_conf);
>   kfree(ldev);
>  }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3.9-stable] drbd: Fix build error when CONFIG_CRYPTO_HMAC is not set

2013-05-10 Thread Philipp Reisner

Yes, please.

> This patch looks like it should be in the 3.9-stable tree, should we apply
> it?
> 
> --
> 
> From: "Philipp Reisner "
> 
> commit ef57f9e6bb9278720c8a5278728f252ab85d7ac6 upstream
> 
> Signed-off-by: Philipp Reisner 
> Signed-off-by: Lars Ellenberg 
> Signed-off-by: Jens Axboe 
> Signed-off-by: Jonghwan Choi 
> ---
>  drivers/block/drbd/drbd_receiver.c |4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/block/drbd/drbd_receiver.c
> b/drivers/block/drbd/drbd_receiver.c
> index 5105f43..2f5fffd 100644
> --- a/drivers/block/drbd/drbd_receiver.c
> +++ b/drivers/block/drbd/drbd_receiver.c
> @@ -4658,8 +4658,8 @@ static int drbd_do_features(struct drbd_tconn *tconn)
>  #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
>  static int drbd_do_auth(struct drbd_tconn *tconn)
>  {
> - dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
> - dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
> + conn_err(tconn, "This kernel was build without
> CONFIG_CRYPTO_HMAC.\n");
> + conn_err(tconn, "You need to disable 'cram-hmac-alg' in
> drbd.conf.\n");
>   return -1;
>  }
>  #else
> --
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3.9-stable ]drbd: fix for deadlock when using automatic split-brain-recovery

2013-05-10 Thread Philipp Reisner

Yes, please.

> This patch looks like it should be in the 3.9-stable tree, should we apply
> it?
> 
> --
> 
> From: "Philipp Reisner "
> 
> commit 7c689e63a847316c1b2500f86891b0a574ce7e69 upstream
> 
> With an automatic after split-brain recovery policy of
> "after-sb-1pri call-pri-lost-after-sb",
> when trying to drbd_set_role() to R_SECONDARY,
> we run into a deadlock.
> 
> This was first recognized and supposedly fixed by
> 2009-06-10 "Fixed a deadlock when using automatic split brain recovery when
> both nodes are"
> replacing drbd_set_role() with drbd_change_state() in that code-path,
> but the first hunk of that patch forgets to remove the drbd_set_role().
> 
> We apparently only ever tested the "two primaries" case.
> 
> Cc:  # 3.9.x: ef57f9e6: drbd: Fix build error
> Signed-off-by: Philipp Reisner 
> Signed-off-by: Lars Ellenberg 
> Signed-off-by: Jens Axboe 
> Signed-off-by: Jonghwan Choi 
> ---
>  drivers/block/drbd/drbd_receiver.c |1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/drivers/block/drbd/drbd_receiver.c
> b/drivers/block/drbd/drbd_receiver.c
> index a9eccfc..5105f43 100644
> --- a/drivers/block/drbd/drbd_receiver.c
> +++ b/drivers/block/drbd/drbd_receiver.c
> @@ -2661,7 +2661,6 @@ static int drbd_asb_recover_1p(struct drbd_conf *mdev)
> __must_hold(local)
>   if (hg == -1 && mdev->state.role == R_PRIMARY) {
>   enum drbd_state_rv rv2;
> 
> - drbd_set_role(mdev, R_SECONDARY, 0);
>/* drbd_change_state() does not sleep while in
> SS_IN_TRANSIENT_STATE,
> * we might be here in C_WF_REPORT_PARAMS which is
> transient.
> * we do not need to wait for the after state
> change work either. */
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 16/17] drbd: use sched_setscheduler()

2013-03-27 Thread Philipp Reisner

It was unnoticed for some time that assigning to current->policy is
no longer sufficient to set a real time priority for a kernel thread.

Reported-by: Charlie Suffin 
Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_receiver.c |6 --
 include/linux/drbd.h   |2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index a75c0b1..0f449bb 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -5257,9 +5257,11 @@ int drbd_asender(struct drbd_thread *thi)
bool ping_timeout_active = false;
struct net_conf *nc;
int ping_timeo, tcp_cork, ping_int;
+   struct sched_param param = { .sched_priority = 2 };
 
-   current->policy = SCHED_RR;  /* Make this a realtime task! */
-   current->rt_priority = 2;/* more important than all other tasks */
+   rv = sched_setscheduler(current, SCHED_RR, ¶m);
+   if (rv < 0)
+   conn_err(tconn, "drbd_asender: ERROR set priority, ret=%d\n", 
rv);
 
while (get_t_state(thi) == RUNNING) {
drbd_thread_current_set_cpu(thi);
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 3163307..1b4d4ee 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -52,7 +52,7 @@
 #endif
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.4.2"
+#define REL_VERSION "8.4.3"
 #define API_VERSION 1
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 101
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 15/17] drbd: fix for deadlock when using automatic split-brain-recovery

2013-03-27 Thread Philipp Reisner

With an automatic after split-brain recovery policy of
"after-sb-1pri call-pri-lost-after-sb",
when trying to drbd_set_role() to R_SECONDARY,
we run into a deadlock.

This was first recognized and supposedly fixed by
2009-06-10 "Fixed a deadlock when using automatic split brain recovery when 
both nodes are"
replacing drbd_set_role() with drbd_change_state() in that code-path,
but the first hunk of that patch forgets to remove the drbd_set_role().

We apparently only ever tested the "two primaries" case.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_receiver.c |1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index 7af0cc7..a75c0b1 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2662,7 +2662,6 @@ static int drbd_asb_recover_1p(struct drbd_conf *mdev) 
__must_hold(local)
if (hg == -1 && mdev->state.role == R_PRIMARY) {
enum drbd_state_rv rv2;
 
-   drbd_set_role(mdev, R_SECONDARY, 0);
 /* drbd_change_state() does not sleep while in 
SS_IN_TRANSIENT_STATE,
  * we might be here in C_WF_REPORT_PARAMS which is 
transient.
  * we do not need to wait for the after state change 
work either. */
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 02/17] drbd: reset ap_in_flight counter for new connections

2013-03-27 Thread Philipp Reisner

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_receiver.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index 1921871..cd172b4 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -849,6 +849,7 @@ int drbd_connected(struct drbd_conf *mdev)
err = drbd_send_current_state(mdev);
clear_bit(USE_DEGR_WFC_T, &mdev->flags);
clear_bit(RESIZE_PENDING, &mdev->flags);
+   atomic_set(&mdev->ap_in_flight, 0);
mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
return err;
 }
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 17/17] drbd: fix if(); found by kbuild test robot

2013-03-27 Thread Philipp Reisner

From: Lars Ellenberg 

Recently introduced al_begin_io_nonblock() was returning -EBUSY,
even when it should return -EWOULDBLOCK.

Impact:
A few spurious wake_up() calls in prepare_al_transaction_nonblock().

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_actlog.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 6afe173..6608076 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -389,7 +389,7 @@ int drbd_al_begin_io_nonblock(struct drbd_conf *mdev, 
struct drbd_interval *i)
if (unlikely(tmp != NULL)) {
struct bm_extent  *bm_ext = lc_entry(tmp, struct 
bm_extent, lce);
if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
-   if (!test_and_set_bit(BME_PRIORITY, 
&bm_ext->flags));
+   if (!test_and_set_bit(BME_PRIORITY, 
&bm_ext->flags))
return -EBUSY;
return -EWOULDBLOCK;
}
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 14/17] drbd: add module_put() on error path in drbd_proc_open()

2013-03-27 Thread Philipp Reisner

From: Alexey Khoroshilov 

If single_open() fails in drbd_proc_open(), module refcount is left incremented.
The patch adds module_put() on the error path.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov 
Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_proc.c |   10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 56672a6..30fe0a5 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -313,8 +313,14 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
 
 static int drbd_proc_open(struct inode *inode, struct file *file)
 {
-   if (try_module_get(THIS_MODULE))
-   return single_open(file, drbd_seq_show, PDE(inode)->data);
+   int err;
+
+   if (try_module_get(THIS_MODULE)) {
+   err = single_open(file, drbd_seq_show, PDE(inode)->data);
+   if (err)
+   module_put(THIS_MODULE);
+   return err;
+   }
return -ENODEV;
 }
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 03/17] drbd: abort start of resync early, if it raced with connection breakage

2013-03-27 Thread Philipp Reisner

We've seen a spurious full resync, because a connection breakage
raced with drbd_start_resync(, C_SYNC_TARGET),
and the resulting state change request intended to start the resync
ended up looking like a local invalidate.

Fix:
Double check the state inside the lock,
and don't even request that state change,
if we had connection or IO problems.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_worker.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index f41e224..7f51f88 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1653,7 +1653,9 @@ void drbd_start_resync(struct drbd_conf *mdev, enum 
drbd_conns side)
clear_bit(B_RS_H_DONE, &mdev->flags);
 
write_lock_irq(&global_state_lock);
-   if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
+   /* Did some connection breakage or IO error race with us? */
+   if (mdev->state.conn < C_CONNECTED
+   || !get_ldev_if_state(mdev, D_NEGOTIATING)) {
write_unlock_irq(&global_state_lock);
mutex_unlock(mdev->state_mutex);
return;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 13/17] drbd: fix drbd epoch write count for ahead/behind mode

2013-03-27 Thread Philipp Reisner

From: Lars Ellenberg 

The sanity check when receiving P_BARRIER_ACK does expect all write
requests with a given req->epoch to have been either all replicated,
or all not replicated.

Because req->epoch was assigned before calling maybe_pull_ahead(),
this expectation was not met, leading to an off-by-one in the sanity
check, and further to a "Protocol Error".

Fix: move the call to maybe_pull_ahead() a few lines up,
and assign req->epoch only after that.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_req.c |   14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index beefe65..c24379f 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -865,8 +865,10 @@ static void maybe_pull_ahead(struct drbd_conf *mdev)
bool congested = false;
enum drbd_on_congestion on_congestion;
 
+   rcu_read_lock();
nc = rcu_dereference(tconn->net_conf);
on_congestion = nc ? nc->on_congestion : OC_BLOCK;
+   rcu_read_unlock();
if (on_congestion == OC_BLOCK ||
tconn->agreed_pro_version < 96)
return;
@@ -960,14 +962,8 @@ static int drbd_process_write_request(struct drbd_request 
*req)
struct drbd_conf *mdev = req->w.mdev;
int remote, send_oos;
 
-   rcu_read_lock();
remote = drbd_should_do_remote(mdev->state);
-   if (remote) {
-   maybe_pull_ahead(mdev);
-   remote = drbd_should_do_remote(mdev->state);
-   }
send_oos = drbd_should_send_out_of_sync(mdev->state);
-   rcu_read_unlock();
 
/* Need to replicate writes.  Unless it is an empty flush,
 * which is better mapped to a DRBD P_BARRIER packet,
@@ -1087,9 +1083,13 @@ static void drbd_send_and_submit(struct drbd_conf *mdev, 
struct drbd_request *re
 * but will re-aquire it before it returns here.
 * Needs to be before the check on drbd_suspended() */
complete_conflicting_writes(req);
+   /* no more giving up req_lock from now on! */
+
+   /* check for congestion, and potentially stop sending
+* full data updates, but start sending "dirty bits" only. */
+   maybe_pull_ahead(mdev);
}
 
-   /* no more giving up req_lock from now on! */
 
if (drbd_suspended(mdev)) {
/* push back and retry: */
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 12/17] drbd: Fix build error when CONFIG_CRYPTO_HMAC is not set

2013-03-27 Thread Philipp Reisner

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_receiver.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index cd172b4..7af0cc7 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -4660,8 +4660,8 @@ static int drbd_do_features(struct drbd_tconn *tconn)
 #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
 static int drbd_do_auth(struct drbd_tconn *tconn)
 {
-   dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
-   dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
+   conn_err(tconn, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
+   conn_err(tconn, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
return -1;
 }
 #else
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 10/17] drbd: fix memory leak

2013-03-27 Thread Philipp Reisner

From: Lars Ellenberg 

We forgot to free the disk_conf,
so for each attach/detach cycle we leaked 336 bytes.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_main.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 67d2bb3..1b93a726 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2819,6 +2819,7 @@ void drbd_free_bc(struct drbd_backing_dev *ldev)
blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
 
+   kfree(ldev->disk_conf);
kfree(ldev);
 }
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 09/17] drbd: only fail empty flushes if no good data is reachable

2013-03-27 Thread Philipp Reisner

From: Lars Ellenberg 

We completed empty flushes (blkdev_issue_flush()) with IO error
if we lost the local disk, even if we still have an established
replication link to a healthy remote disk.

Fix this to only report errors to upper layers,
if neither local nor remote data is reachable.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_req.c |   12 
 drivers/block/drbd/drbd_req.h |8 
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 9f7ff1c..beefe65 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -263,8 +263,7 @@ void drbd_req_complete(struct drbd_request *req, struct 
bio_and_error *m)
else
root = &mdev->read_requests;
drbd_remove_request_interval(root, req);
-   } else if (!(s & RQ_POSTPONED))
-   D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
+   }
 
/* Before we can signal completion to the upper layers,
 * we may need to close the current transfer log epoch.
@@ -755,6 +754,11 @@ int __req_mod(struct drbd_request *req, enum 
drbd_req_event what,
D_ASSERT(req->rq_state & RQ_NET_PENDING);
mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK|RQ_NET_DONE);
break;
+
+   case QUEUE_AS_DRBD_BARRIER:
+   start_new_tl_epoch(mdev->tconn);
+   mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE);
+   break;
};
 
return rv;
@@ -975,8 +979,8 @@ static int drbd_process_write_request(struct drbd_request 
*req)
/* The only size==0 bios we expect are empty flushes. */
D_ASSERT(req->master_bio->bi_rw & REQ_FLUSH);
if (remote)
-   start_new_tl_epoch(mdev->tconn);
-   return 0;
+   _req_mod(req, QUEUE_AS_DRBD_BARRIER);
+   return remote;
}
 
if (!remote && !send_oos)
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index c08d229..978cb1a 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -88,6 +88,14 @@ enum drbd_req_event {
QUEUE_FOR_NET_READ,
QUEUE_FOR_SEND_OOS,
 
+   /* An empty flush is queued as P_BARRIER,
+* which will cause it to complete "successfully",
+* even if the local disk flush failed.
+*
+* Just like "real" requests, empty flushes (blkdev_issue_flush()) will
+* only see an error if neither local nor remote data is reachable. */
+   QUEUE_AS_DRBD_BARRIER,
+
SEND_CANCELED,
SEND_FAILED,
HANDED_OVER_TO_NETWORK,
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 08/17] drbd: Fix disconnect to keep the peer disk state if connection breaks during operation

2013-03-27 Thread Philipp Reisner

The issue was that if the connection broke while we did the
gracefull state change to C_DISCONNECTING (C_TEARDOWN), then
we returned a success code from the state engine. (SS_CW_NO_NEED)

The result of that is that we missed to call the fence-peer
script in such a case.

Fixed that by introducing a new error code (SS_OUTDATE_WO_CONN).
This one should never reach back into user space.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_nl.c  |7 +--
 drivers/block/drbd/drbd_state.c   |   14 +++---
 drivers/block/drbd/drbd_strings.c |1 +
 include/linux/drbd.h  |3 ++-
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 56bafdc..39e9a91 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2198,8 +2198,11 @@ static enum drbd_state_rv conn_try_disconnect(struct 
drbd_tconn *tconn, bool for
return SS_SUCCESS;
case SS_PRIMARY_NOP:
/* Our state checking code wants to see the peer outdated. */
-   rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
-   pdsk, D_OUTDATED), CS_VERBOSE);
+   rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, pdsk, 
D_OUTDATED), 0);
+
+   if (rv == SS_OUTDATE_WO_CONN) /* lost connection before 
graceful disconnect succeeded */
+   rv = conn_request_state(tconn, NS(conn, 
C_DISCONNECTING), CS_VERBOSE);
+
break;
case SS_CW_FAILED_BY_PEER:
/* The peer probably wants to see us outdated. */
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 22e259f..90c5be2 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -642,6 +642,10 @@ is_valid_soft_transition(union drbd_state os, union 
drbd_state ns, struct drbd_t
&& os.conn < C_WF_REPORT_PARAMS)
rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget 
etc... */
 
+   if (ns.conn == C_DISCONNECTING && ns.pdsk == D_OUTDATED &&
+   os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)
+   rv = SS_OUTDATE_WO_CONN;
+
return rv;
 }
 
@@ -1748,13 +1752,9 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state 
mask, union drbd_state
if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags))
return SS_CW_FAILED_BY_PEER;
 
-   rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : 
SS_UNKNOWN_ERROR;
-
-   if (rv == SS_UNKNOWN_ERROR)
-   rv = conn_is_valid_transition(tconn, mask, val, 0);
-
-   if (rv == SS_SUCCESS)
-   rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
+   rv = conn_is_valid_transition(tconn, mask, val, 0);
+   if (rv == SS_SUCCESS && tconn->cstate == C_WF_REPORT_PARAMS)
+   rv = SS_UNKNOWN_ERROR; /* continue waiting */
 
return rv;
 }
diff --git a/drivers/block/drbd/drbd_strings.c 
b/drivers/block/drbd/drbd_strings.c
index 9a664bd..58e08ff 100644
--- a/drivers/block/drbd/drbd_strings.c
+++ b/drivers/block/drbd/drbd_strings.c
@@ -89,6 +89,7 @@ static const char *drbd_state_sw_errors[] = {
[-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
[-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state 
change",
[-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and 
aborted",
+   [-SS_OUTDATE_WO_CONN] = "Need a connection for a graceful 
disconnect/outdate peer",
[-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by 
config",
 };
 
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 0c5a18e..3163307 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -319,7 +319,8 @@ enum drbd_state_rv {
SS_IN_TRANSIENT_STATE = -18,  /* Retry after the next state change */
SS_CONCURRENT_ST_CHG = -19,   /* Concurrent cluster side state change! 
*/
SS_O_VOL_PEER_PRI = -20,
-   SS_AFTER_LAST_ERROR = -21,/* Keep this at bottom */
+   SS_OUTDATE_WO_CONN = -21,
+   SS_AFTER_LAST_ERROR = -22,/* Keep this at bottom */
 };
 
 /* from drbd_strings.c */
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 00/17] RFC: Pile of DRBD fixes

2013-03-27 Thread Philipp Reisner

The first patch improves the idr_for_each_entry() macro and its
documentation, which is not specific to DRBD.

Each patch in this series addresses an independent issue, please refer
to the individual commit messages for a description.


Alexey Khoroshilov (1):
  drbd: add module_put() on error path in drbd_proc_open()

George Spelvin (1):
  idr: document exit conditions on idr_for_each_entry better

Lars Ellenberg (5):
  drbd: only fail empty flushes if no good data is reachable
  drbd: fix memory leak
  drbd: validate resync_after dependency on attach already
  drbd: fix drbd epoch write count for ahead/behind mode
  drbd: fix if(); found by kbuild test robot

Philipp Reisner (10):
  drbd: reset ap_in_flight counter for new connections
  drbd: abort start of resync early, if it raced with connection
breakage
  drbd: move invalidating the whole bitmap out of after_state ch()
  drbd: fix effective error returned when refusing an invalidate
  drbd: drop now useless duplicate state request from invalidate
  drbd: fix spurious warning about bitmap being locked from detach
  drbd: Fix disconnect to keep the peer disk state if connection breaks
during operation
  drbd: Fix build error when CONFIG_CRYPTO_HMAC is not set
  drbd: fix for deadlock when using automatic split-brain-recovery
  drbd: use sched_setscheduler()

 drivers/block/drbd/drbd_actlog.c   |2 +-
 drivers/block/drbd/drbd_main.c |7 +++-
 drivers/block/drbd/drbd_nl.c   |   71 
 drivers/block/drbd/drbd_proc.c |   10 -
 drivers/block/drbd/drbd_receiver.c |   12 +++---
 drivers/block/drbd/drbd_req.c  |   26 +++--
 drivers/block/drbd/drbd_req.h  |8 
 drivers/block/drbd/drbd_state.c|   28 +++---
 drivers/block/drbd/drbd_strings.c  |1 +
 drivers/block/drbd/drbd_worker.c   |   19 --
 include/linux/drbd.h   |5 ++-
 include/linux/idr.h|   10 +++--
 12 files changed, 123 insertions(+), 76 deletions(-)

-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 11/17] drbd: validate resync_after dependency on attach already

2013-03-27 Thread Philipp Reisner

From: Lars Ellenberg 

We validated resync_after dependencies, if changed via disk-options.
But we did not validate them when first created via attach.
We also did not check or cleanup dependencies that used to be correct,
but now point to meanwhile removed minor devices.

If the drbd_resync_after_valid() validation in disk-options tried to
follow a dependency chain in this way, this could lead to NULL pointer
dereference.

Validate resync_after settings in drbd_adm_attach() already, as well as
in drbd_adm_disk_opts(), and and only reject dependency loops.
Depending on non-existing disks is allowed and equivalent to no dependency.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_nl.c |6 ++
 drivers/block/drbd/drbd_worker.c |   15 ---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 39e9a91..9e3f441 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1381,6 +1381,12 @@ int drbd_adm_attach(struct sk_buff *skb, struct 
genl_info *info)
goto fail;
}
 
+   write_lock_irq(&global_state_lock);
+   retcode = drbd_resync_after_valid(mdev, new_disk_conf->resync_after);
+   write_unlock_irq(&global_state_lock);
+   if (retcode != NO_ERROR)
+   goto fail;
+
rcu_read_lock();
nc = rcu_dereference(mdev->tconn->net_conf);
if (nc) {
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 7f51f88..891c0ec 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1426,7 +1426,7 @@ static int _drbd_may_sync_now(struct drbd_conf *mdev)
int resync_after;
 
while (1) {
-   if (!odev->ldev)
+   if (!odev->ldev || odev->state.disk == D_DISKLESS)
return 1;
rcu_read_lock();
resync_after = 
rcu_dereference(odev->ldev->disk_conf)->resync_after;
@@ -1434,7 +1434,7 @@ static int _drbd_may_sync_now(struct drbd_conf *mdev)
if (resync_after == -1)
return 1;
odev = minor_to_mdev(resync_after);
-   if (!expect(odev))
+   if (!odev)
return 1;
if ((odev->state.conn >= C_SYNC_SOURCE &&
 odev->state.conn <= C_PAUSED_SYNC_T) ||
@@ -1516,7 +1516,7 @@ enum drbd_ret_code drbd_resync_after_valid(struct 
drbd_conf *mdev, int o_minor)
 
if (o_minor == -1)
return NO_ERROR;
-   if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
+   if (o_minor < -1 || o_minor > MINORMASK)
return ERR_RESYNC_AFTER;
 
/* check for loops */
@@ -1525,6 +1525,15 @@ enum drbd_ret_code drbd_resync_after_valid(struct 
drbd_conf *mdev, int o_minor)
if (odev == mdev)
return ERR_RESYNC_AFTER_CYCLE;
 
+   /* You are free to depend on diskless, non-existing,
+* or not yet/no longer existing minors.
+* We only reject dependency loops.
+* We cannot follow the dependency chain beyond a detached or
+* missing minor.
+*/
+   if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
+   return NO_ERROR;
+
rcu_read_lock();
resync_after = 
rcu_dereference(odev->ldev->disk_conf)->resync_after;
rcu_read_unlock();
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 06/17] drbd: drop now useless duplicate state request from invalidate

2013-03-27 Thread Philipp Reisner

Patch best viewed with git diff --ignore-space-change.

Now that we attempt the fallback to local bitmap operation
only when disconnected, we can safely drop the extra "silent"
state request from both invalidate and invalidate-remote.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_nl.c |   62 +++---
 1 file changed, 28 insertions(+), 34 deletions(-)

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index c49bda7..56bafdc 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2446,26 +2446,19 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct 
genl_info *info)
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
drbd_flush_workqueue(mdev);
 
-   retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), 
CS_ORDERED);
-
-   /* If that did not work, try again,
-* but log failures this time (implicit CS_VERBOSE).
-*
-* If we happen to be C_STANDALONE R_SECONDARY,
-* just change to D_INCONSISTENT, and set all bits in the bitmap.
-* Otherwise, we just fail, to avoid races with the resync handshake.
+   /* If we happen to be C_STANDALONE R_SECONDARY, just change to
+* D_INCONSISTENT, and set all bits in the bitmap.  Otherwise,
+* try to start a resync handshake as sync target for full sync.
 */
-   if (retcode < SS_SUCCESS) {
-   if (mdev->state.conn == C_STANDALONE && mdev->state.role == 
R_SECONDARY) {
-   retcode = drbd_request_state(mdev, NS(disk, 
D_INCONSISTENT));
-   if (retcode >= SS_SUCCESS) {
-   if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
-   "set_n_write from invalidate", 
BM_LOCKED_MASK))
-   retcode = ERR_IO_MD_DISK;
-   }
-   } else
-   retcode = drbd_request_state(mdev, NS(conn, 
C_STARTING_SYNC_T));
-   }
+   if (mdev->state.conn == C_STANDALONE && mdev->state.role == 
R_SECONDARY) {
+   retcode = drbd_request_state(mdev, NS(disk, D_INCONSISTENT));
+   if (retcode >= SS_SUCCESS) {
+   if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
+   "set_n_write from invalidate", BM_LOCKED_MASK))
+   retcode = ERR_IO_MD_DISK;
+   }
+   } else
+   retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
drbd_resume_io(mdev);
 
 out:
@@ -2519,21 +2512,22 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, 
struct genl_info *info)
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
drbd_flush_workqueue(mdev);
 
-   retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), 
CS_ORDERED);
-   if (retcode < SS_SUCCESS) {
-   if (mdev->state.conn == C_STANDALONE && mdev->state.role == 
R_PRIMARY) {
-   /* The peer will get a resync upon connect anyways. 
Just make that
-  into a full resync. */
-   retcode = drbd_request_state(mdev, NS(pdsk, 
D_INCONSISTENT));
-   if (retcode >= SS_SUCCESS) {
-   if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
-  "set_n_write from 
invalidate_peer",
-  BM_LOCKED_SET_ALLOWED))
-   retcode = ERR_IO_MD_DISK;
-   }
-   } else
-   retcode = drbd_request_state(mdev, NS(conn, 
C_STARTING_SYNC_S));
-   }
+   /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
+* in the bitmap.  Otherwise, try to start a resync handshake
+* as sync source for full sync.
+*/
+   if (mdev->state.conn == C_STANDALONE && mdev->state.role == R_PRIMARY) {
+   /* The peer will get a resync upon connect anyways. Just make 
that
+  into a full resync. */
+   retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
+   if (retcode >= SS_SUCCESS) {
+   if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
+   "set_n_write from invalidate_peer",
+   BM_LOCKED_SET_ALLOWED))
+   retcode = ERR_IO_MD_DISK;
+   }
+   } else
+   retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
drbd_resume_io(mdev);
 
 out:
-- 
1.7.9.5

--
To unsubscribe from this list: send the line &qu

[PATCH 07/17] drbd: fix spurious warning about bitmap being locked from detach

2013-03-27 Thread Philipp Reisner

Introduced in drbd: always write bitmap on detach,
the bitmap bulk writeout on detach was indicating
it expected exclusive bitmap access.

Where I meant to say: expect no more modifications,
but testing/counting is still allowed.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_main.c |6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index a150b59..67d2bb3 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3412,8 +3412,12 @@ static int w_go_diskless(struct drbd_work *w, int unused)
 * end up here after a failed attach, before ldev was even assigned.
 */
if (mdev->bitmap && mdev->ldev) {
+   /* An interrupted resync or similar is allowed to recounts bits
+* while we detach.
+* Any modifications would not be expected anymore, though.
+*/
if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write,
-   "detach", BM_LOCKED_MASK)) {
+   "detach", BM_LOCKED_TEST_ALLOWED)) {
if (test_bit(WAS_READ_ERROR, &mdev->flags)) {
drbd_md_set_flag(mdev, MDF_FULL_SYNC);
drbd_md_sync(mdev);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 05/17] drbd: fix effective error returned when refusing an invalidate

2013-03-27 Thread Philipp Reisner

Since commit
  drbd: Disallow the peer_disk_state to be D_OUTDATED while connected
trying to invalidate a disconnected Primary returned an error code
that did not really match the situation:
"Refusing to be Outdated while Connected"

Insert two more specific conditions into is_valid_state(),
changing that to "Need access to UpToDate data",
respectively "Need a connection to start verify or resync".

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_state.c |7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 3bc686f..22e259f 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -570,6 +570,13 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
  mdev->tconn->agreed_pro_version < 88)
rv = SS_NOT_SUPPORTED;
 
+   else if (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < 
D_UP_TO_DATE)
+   rv = SS_NO_UP_TO_DATE_DISK;
+
+   else if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) 
&&
+ ns.pdsk == D_UNKNOWN)
+   rv = SS_NEED_CONNECTION;
+
else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
rv = SS_CONNECTED_OUTDATES;
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 01/17] idr: document exit conditions on idr_for_each_entry better

2013-03-27 Thread Philipp Reisner

From: George Spelvin 

And some manual common subexpression elimination which may help the
compiler produce smaller code.

Signed-off-by: George Spelvin 
Signed-off-by: Philipp Reisner 
---
 include/linux/idr.h |   10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 2640c7e..6ece058 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -122,11 +122,13 @@ static inline void *idr_find(struct idr *idr, int id)
  * @idp: idr handle
  * @entry:   the type * to use as cursor
  * @id:  id entry's key
+ *
+ * @entry and @id do not need to be initialized before the loop, and
+ * after normal terminatinon @entry is left with the value NULL.  This
+ * is convenient for a "not found" value.
  */
-#define idr_for_each_entry(idp, entry, id) \
-   for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \
-entry != NULL; \
-++id, entry = (typeof(entry))idr_get_next((idp), &(id)))
+#define idr_for_each_entry(idp, entry, id) \
+   for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id)
 
 /*
  * Don't use the following functions.  These exist only to suppress
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 04/17] drbd: move invalidating the whole bitmap out of after_state ch()

2013-03-27 Thread Philipp Reisner

To avoid other state change requests, after passing through
sanitize_state(), to be mistaken for an invalidate,
move the "set all bits as out-of-sync" into the invalidate path.

Make invalidate and invalidate-remote behave consistently wrt.
current connection state (need either an established replication link,
or really be disconnected). Also mention that in the documentation.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_nl.c|   36 
 drivers/block/drbd/drbd_state.c |7 ---
 2 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 42fda4a..c49bda7 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2448,19 +2448,23 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct 
genl_info *info)
 
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), 
CS_ORDERED);
 
-   if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION)
-   retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
-
-   while (retcode == SS_NEED_CONNECTION) {
-   spin_lock_irq(&mdev->tconn->req_lock);
-   if (mdev->state.conn < C_CONNECTED)
-   retcode = _drbd_set_state(_NS(mdev, disk, 
D_INCONSISTENT), CS_VERBOSE, NULL);
-   spin_unlock_irq(&mdev->tconn->req_lock);
-
-   if (retcode != SS_NEED_CONNECTION)
-   break;
-
-   retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
+   /* If that did not work, try again,
+* but log failures this time (implicit CS_VERBOSE).
+*
+* If we happen to be C_STANDALONE R_SECONDARY,
+* just change to D_INCONSISTENT, and set all bits in the bitmap.
+* Otherwise, we just fail, to avoid races with the resync handshake.
+*/
+   if (retcode < SS_SUCCESS) {
+   if (mdev->state.conn == C_STANDALONE && mdev->state.role == 
R_SECONDARY) {
+   retcode = drbd_request_state(mdev, NS(disk, 
D_INCONSISTENT));
+   if (retcode >= SS_SUCCESS) {
+   if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
+   "set_n_write from invalidate", 
BM_LOCKED_MASK))
+   retcode = ERR_IO_MD_DISK;
+   }
+   } else
+   retcode = drbd_request_state(mdev, NS(conn, 
C_STARTING_SYNC_T));
}
drbd_resume_io(mdev);
 
@@ -2517,9 +2521,9 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct 
genl_info *info)
 
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), 
CS_ORDERED);
if (retcode < SS_SUCCESS) {
-   if (retcode == SS_NEED_CONNECTION && mdev->state.role == 
R_PRIMARY) {
-   /* The peer will get a resync upon connect anyways.
-* Just make that into a full resync. */
+   if (mdev->state.conn == C_STANDALONE && mdev->state.role == 
R_PRIMARY) {
+   /* The peer will get a resync upon connect anyways. 
Just make that
+  into a full resync. */
retcode = drbd_request_state(mdev, NS(pdsk, 
D_INCONSISTENT));
if (retcode >= SS_SUCCESS) {
if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 0fe220c..3bc686f 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1377,13 +1377,6 @@ static void after_state_ch(struct drbd_conf *mdev, union 
drbd_state os,
&drbd_bmio_set_n_write, &abw_start_sync,
"set_n_write from StartingSync", 
BM_LOCKED_TEST_ALLOWED);
 
-   /* We are invalidating our self... */
-   if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
-   os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
-   /* other bitmap operation expected during this phase */
-   drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
-   "set_n_write from invalidate", BM_LOCKED_MASK);
-
/* first half of local IO error, failure to attach,
 * or administrative detach */
if (os.disk != D_FAILED && ns.disk == D_FAILED) {
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

drbd fixes

2012-08-16 Thread Philipp Reisner

Hi Jens,

Please consider to pull these 3 fixes in for 3.6.

Best,
 Phil

The following changes since commit a73ff3231df59a4b92ccd0dd4e73897c5822489b:

  drbd: announce FLUSH/FUA capability to upper layers (2012-07-24 15:14:28 
+0200)

are available in the git repository at:

  git://git.drbd.org/linux-drbd.git for-jens

for you to fetch changes up to d1aa4d04da8de5c89d73859e077d89c4c71d8ed1:

  drbd: Write all pages of the bitmap after an online resize (2012-08-16 
17:17:35 +0200)


Lars Ellenberg (1):
  drbd: fix drbd wire compatibility for empty flushes

Philipp Reisner (2):
  drbd: Finish requests that completed while IO was frozen
  drbd: Write all pages of the bitmap after an online resize

 drivers/block/drbd/drbd_bitmap.c |   15 ++-
 drivers/block/drbd/drbd_int.h|1 +
 drivers/block/drbd/drbd_main.c   |   28 
 drivers/block/drbd/drbd_nl.c |4 ++--
 drivers/block/drbd/drbd_req.c|   36 
 5 files changed, 61 insertions(+), 23 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

drbd-8.3 fixes

2012-07-24 Thread Philipp Reisner

Hi Jens,

Please consider to pull these changes for the 3.6 merge window.
I did not found a for-3.6/drivers branch, so I based the patches
on the 3.5 release.

Best,
 Phil


The following changes since commit 28a33cbc24e4256c143dce96c7d93bf423229f92:

  Linux 3.5 (2012-07-21 13:58:29 -0700)

are available in the git repository at:

  git://git.drbd.org/linux-drbd.git for-jens

for you to fetch changes up to a73ff3231df59a4b92ccd0dd4e73897c5822489b:

  drbd: announce FLUSH/FUA capability to upper layers (2012-07-24 15:14:28 
+0200)


Lars Ellenberg (10):
  drbd: cleanup, remove two unused global flags
  drbd: differentiate between normal and forced detach
  drbd: report congestion if we are waiting for some userland callback
  drbd: reset congestion information before reporting it in /proc/drbd
  drbd: do not reset rs_pending_cnt too early
  drbd: call local-io-error handler early
  drbd: fix potential access after free
  drbd: flush drbd work queue before invalidate/invalidate remote   
  
  drbd: fix max_bio_size to be unsigned 
  
  drbd: announce FLUSH/FUA capability to upper layers

 drivers/block/drbd/drbd_actlog.c   |8 +++--
 drivers/block/drbd/drbd_bitmap.c   |4 +--
 drivers/block/drbd/drbd_int.h  |   44 
 drivers/block/drbd/drbd_main.c |   65 +++-
 drivers/block/drbd/drbd_nl.c   |   36 +++-
 drivers/block/drbd/drbd_proc.c |3 ++
 drivers/block/drbd/drbd_receiver.c |   38 +++--
 drivers/block/drbd/drbd_req.c  |9 +++--
 drivers/block/drbd/drbd_worker.c   |   12 ++-
 9 files changed, 153 insertions(+), 66 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[GIT PULL] drbd regression fix

2013-01-21 Thread Philipp Reisner

Hi Jens,

Here is a single fix I want to hand in for Linux-3.8.

The following changes since commit d2ec180c23a5a1bfe34d8638b0342a47c00cf70f:

  drbd: update Kconfig to match current dependencies (2012-12-06 13:08:29 +0100)

are available in the git repository at:

  git://git.drbd.org/linux-drbd for-jens

for you to fetch changes up to 2681f7f6ce6c7416eb619d0fb19422bcc68bd9e1:

  drbd: fix potential protocol error and resulting disconnect/reconnect 
(2013-01-21 22:58:36 +0100)


Lars Ellenberg (1):
  drbd: fix potential protocol error and resulting disconnect/reconnect

 drivers/block/drbd/drbd_req.c   |2 +-
 drivers/block/drbd/drbd_req.h   |1 +
 drivers/block/drbd/drbd_state.c |7 +++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index f58a4a4..2b8303a 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -168,7 +168,7 @@ static void wake_all_senders(struct drbd_tconn *tconn) {
 }
 
 /* must hold resource->req_lock */
-static void start_new_tl_epoch(struct drbd_tconn *tconn)
+void start_new_tl_epoch(struct drbd_tconn *tconn)
 {
/* no point closing an epoch, if it is empty, anyways. */
if (tconn->current_tle_writes == 0)
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 016de6b..c08d229 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -267,6 +267,7 @@ struct bio_and_error {
int error;
 };
 
+extern void start_new_tl_epoch(struct drbd_tconn *tconn);
 extern void drbd_req_destroy(struct kref *kref);
 extern void _req_may_be_done(struct drbd_request *req,
struct bio_and_error *m);
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 53bf618..0fe220c 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -931,6 +931,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state 
ns,
enum drbd_state_rv rv = SS_SUCCESS;
enum sanitize_state_warnings ssw;
struct after_state_chg_work *ascw;
+   bool did_remote, should_do_remote;
 
os = drbd_read_state(mdev);
 
@@ -981,11 +982,17 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state 
ns,
(os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
atomic_inc(&mdev->local_cnt);
 
+   did_remote = drbd_should_do_remote(mdev->state);
mdev->state.i = ns.i;
+   should_do_remote = drbd_should_do_remote(mdev->state);
mdev->tconn->susp = ns.susp;
mdev->tconn->susp_nod = ns.susp_nod;
mdev->tconn->susp_fen = ns.susp_fen;
 
+   /* put replicated vs not-replicated requests in seperate epochs */
+   if (did_remote != should_do_remote)
+   start_new_tl_epoch(mdev->tconn);
+
if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
drbd_print_uuids(mdev, "attached to UUIDs");
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

connector: Bugfix for cn_call_callback()

2007-03-07 Thread Philipp Reisner

Hi Evgeniy,

When one stresses the connector code, with sending many messages
from userspace to kernel, one could get in the "unlikely()"
part in cn_call_callback().

There a new __cbq gets allocated, and a NULL pointer got assigned
to the callback by dereferencing __cbq. This is the bug. The right
thing is the dereference the original __cbq. Therefore the bugfix
is to use a new variable for the newly allocated __cbq. 

This is tested, and it fixes the issue.

Signed-off-by: Philipp Reisner <[EMAIL PROTECTED]>
Signed-off-by: Lars Ellenberg <[EMAIL PROTECTED]>

--- /usr/src/linux-2.6.20/drivers/connector/connector.c 2007-03-07 
11:45:38.0 +0100
+++ /usr/src/linux-2.6.20-modified/drivers/connector/connector.c
2007-03-07 11:39:11.0 +0100
@@ -128,7 +128,7 @@
  */
 static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), 
void *data)
 {
-   struct cn_callback_entry *__cbq;
+   struct cn_callback_entry *__cbq, *__new_cbq;
struct cn_dev *dev = &cdev;
int err = -ENODEV;

@@ -148,23 +148,23 @@
} else {
struct cn_callback_data *d;

-   __cbq = kzalloc(sizeof(*__cbq), GFP_ATOMIC);
-   if (__cbq) {
-   d = &__cbq->data;
+   __new_cbq = kzalloc(sizeof(*__new_cbq), 
GFP_ATOMIC);
+   if (__new_cbq) {
+   d = &__new_cbq->data;
d->callback_priv = msg;
d->callback = __cbq->data.callback;
d->ddata = data;
d->destruct_data = destruct_data;
-   d->free = __cbq;
+   d->free = __new_cbq;

-   INIT_WORK(&__cbq->work,
-   &cn_queue_wrapper);
+   INIT_WORK(&__new_cbq->work,
+ &cn_queue_wrapper);

if (queue_work(dev->cbdev->cn_queue,
-   &__cbq->work))
+   &__new_cbq->work))
err = 0;
else {
-   kfree(__cbq);
+   kfree(__new_cbq);
err = -EINVAL;
        }
} else


-- 
: Dipl-Ing Philipp Reisner  Tel +43-1-8178292-50 :
: LINBIT Information Technologies GmbH  Fax +43-1-8178292-82 :
: Vivenotgasse 48, 1120 Vienna, Austriahttp://www.linbit.com :
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] drbd: mark symbols static where possible

2016-09-02 Thread Philipp Reisner

Hi Baoyou,

thanks for the patch. I applied it to our tree. Will be sent to
one of the next merge windows...

best regards,
 Phil
Am Donnerstag, 1. September 2016, 18:57:53 CEST schrieb Baoyou Xie:
> We get a few warnings when building kernel with W=1:
> drivers/block/drbd/drbd_receiver.c:1224:6: warning: no previous prototype
> for 'one_flush_endio' [-Wmissing-prototypes]
> drivers/block/drbd/drbd_req.c:1450:6: warning: no previous prototype for
> 'send_and_submit_pending' [-Wmissing-prototypes]
> drivers/block/drbd/drbd_main.c:924:6: warning: no previous prototype for
> 'assign_p_sizes_qlim' [-Wmissing-prototypes] 
> 
> In fact, these functions are only used in the file in which they are
> declared and don't need a declaration, but can be made static.
> So this patch marks these functions with 'static'.
> 
> Signed-off-by: Baoyou Xie 
> ---
>  drivers/block/drbd/drbd_main.c | 4 +++-
>  drivers/block/drbd/drbd_receiver.c | 2 +-
>  drivers/block/drbd/drbd_req.c  | 3 ++-
>  drivers/block/drbd/drbd_worker.c   | 3 ++-
>  4 files changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
> index 100be55..f0aa746 100644
> --- a/drivers/block/drbd/drbd_main.c
> +++ b/drivers/block/drbd/drbd_main.c
> @@ -921,7 +921,9 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device
> *peer_device) }
> 
>  /* communicated if (agreed_features & DRBD_FF_WSAME) */
> -void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p,
> struct request_queue *q) +static void
> +assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p,
> + struct request_queue *q)
>  {
>   if (q) {
>   p->qlim->physical_block_size = 
cpu_to_be32(queue_physical_block_size(q));
> diff --git a/drivers/block/drbd/drbd_receiver.c
> b/drivers/block/drbd/drbd_receiver.c index 942384f..432f39a 100644
> --- a/drivers/block/drbd/drbd_receiver.c
> +++ b/drivers/block/drbd/drbd_receiver.c
> @@ -1221,7 +1221,7 @@ struct one_flush_context {
>   struct issue_flush_context *ctx;
>  };
> 
> -void one_flush_endio(struct bio *bio)
> +static void one_flush_endio(struct bio *bio)
>  {
>   struct one_flush_context *octx = bio->bi_private;
>   struct drbd_device *device = octx->device;
> diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
> index de279fe..c725bf5 100644
> --- a/drivers/block/drbd/drbd_req.c
> +++ b/drivers/block/drbd/drbd_req.c
> @@ -1447,7 +1447,8 @@ static bool prepare_al_transaction_nonblock(struct
> drbd_device *device, return !list_empty(pending);
>  }
> 
> -void send_and_submit_pending(struct drbd_device *device, struct list_head
> *pending) +static void
> +send_and_submit_pending(struct drbd_device *device, struct list_head
> *pending) {
>   struct drbd_request *req, *tmp;
> 
> diff --git a/drivers/block/drbd/drbd_worker.c
> b/drivers/block/drbd/drbd_worker.c index c6755c9..70f2706 100644
> --- a/drivers/block/drbd/drbd_worker.c
> +++ b/drivers/block/drbd/drbd_worker.c
> @@ -194,7 +194,8 @@ void drbd_peer_request_endio(struct bio *bio)
>   }
>  }
> 
> -void drbd_panic_after_delayed_completion_of_aborted_request(struct
> drbd_device *device) +static void
> +drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device
> *device) {
>   panic("drbd%u %s/%u potential random memory corruption caused by delayed
> completion of aborted local request\n", device->minor,
> device->resource->name, device->vnr);

[PATCH] Fix smatch warning

2014-02-19 Thread Philipp Reisner

Hi Jens,

After I posted to pull request, we got a report about a smatch warning,
which was not CCed to you or the lkml.

It is not a bug right now, but a mistake that could easily become a bug
as we move forward. Normally I would keep this in my queue until the next
batch. Just to be prepared in case the janitors run smatch on your tree:
Please pull it into for-3.15/drivers.

Andreas Gruenbacher (1):
  drbd: Fix future possible NULL pointer dereference

 drivers/block/drbd/drbd_nl.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

-- 
1.7.9.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] drbd: Fix future possible NULL pointer dereference

2014-02-19 Thread Philipp Reisner

From: Andreas Gruenbacher 

Right now every resource has exactly one connection. But we are preparing
for dynamic connections. I.e. in the future thre can be resources without
connections.

However smatch points this out as 'variable dereferenced before check',
which is correct.

This issue was introduced in
drbd: get_one_status(): Iterate over resource->devices instead of 
connection->peer_devices

Reported-by: Dan Carpenter 
Signed-off-by: Andreas Gruenbacher 
Signed-off-by: Philipp Reisner 
---
 drivers/block/drbd/drbd_nl.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 2086b12..526414b 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2776,7 +2776,7 @@ static int nla_put_drbd_cfg_context(struct sk_buff *skb,
if (device &&
nla_put_u32(skb, T_ctx_volume, device->vnr))
goto nla_put_failure;
-   if (nla_put_string(skb, T_ctx_resource_name, 
connection->resource->name))
+   if (nla_put_string(skb, T_ctx_resource_name, resource->name))
goto nla_put_failure;
if (connection) {
if (connection->my_addr_len &&
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/2] drbd: use RB_DECLARE_CALLBACKS() to define augment callbacks

2014-09-18 Thread Philipp Reisner

From: Lai Jiangshan 

The original code are the same as RB_DECLARE_CALLBACKS().

CC: Michel Lespinasse 
Signed-off-by: Lai Jiangshan 
Signed-off-by: Andreas Gruenbacher 
Signed-off-by: Philipp Reisner 
---
 drivers/block/drbd/drbd_interval.c | 36 ++--
 1 file changed, 2 insertions(+), 34 deletions(-)

diff --git a/drivers/block/drbd/drbd_interval.c 
b/drivers/block/drbd/drbd_interval.c
index 04a14e0..51b25ad 100644
--- a/drivers/block/drbd/drbd_interval.c
+++ b/drivers/block/drbd/drbd_interval.c
@@ -37,40 +37,8 @@ compute_subtree_last(struct drbd_interval *node)
return max;
 }
 
-static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
-{
-   while (rb != stop) {
-   struct drbd_interval *node = rb_entry(rb, struct drbd_interval, 
rb);
-   sector_t subtree_last = compute_subtree_last(node);
-   if (node->end == subtree_last)
-   break;
-   node->end = subtree_last;
-   rb = rb_parent(&node->rb);
-   }
-}
-
-static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
-{
-   struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb);
-   struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb);
-
-   new->end = old->end;
-}
-
-static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
-{
-   struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb);
-   struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb);
-
-   new->end = old->end;
-   old->end = compute_subtree_last(old);
-}
-
-static const struct rb_augment_callbacks augment_callbacks = {
-   augment_propagate,
-   augment_copy,
-   augment_rotate,
-};
+RB_DECLARE_CALLBACKS(static, augment_callbacks, struct drbd_interval, rb,
+sector_t, end, compute_subtree_last);
 
 /**
  * drbd_insert_interval  -  insert a new interval into a tree
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/2] More for the 3.18 merge window

2014-09-18 Thread Philipp Reisner

Hi Jens,

please put these two patches into your for-3.18/drivers branch.  They fix
the safety code for concurrent overlapping write detection in a DRBD
dual primary setup.

The first of those two should also go to the stable kernels, I marked it
with CC: sta...@kernel.org


Lai Jiangshan (2):
  drbd: compute the end before rb_insert_augmented()
  drbd: use RB_DECLARE_CALLBACKS() to define augment callbacks

 drivers/block/drbd/drbd_interval.c | 40 ++
 1 file changed, 6 insertions(+), 34 deletions(-)

-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/2] drbd: compute the end before rb_insert_augmented()

2014-09-18 Thread Philipp Reisner

From: Lai Jiangshan 

Commit 98683650 "Merge branch 'drbd-8.4_ed6' into
for-3.8-drivers-drbd-8.4_ed6" switches to the new augment API, but the
new API requires that the tree is augmented before rb_insert_augmented()
is called, which is missing.

So we add the augment-code to drbd_insert_interval() when it travels the
tree up to down before rb_insert_augmented().  See the example in
include/linux/interval_tree_generic.h or Documentation/rbtree.txt.

drbd_insert_interval() may cancel the insertion when traveling, in this
case, the just added augment-code does nothing before cancel since the
@this node is already in the subtrees in this case.

CC: Michel Lespinasse 
CC: sta...@kernel.org # v3.10+
Signed-off-by: Lai Jiangshan 
Signed-off-by: Andreas Gruenbacher 
Signed-off-by: Philipp Reisner 
---
 drivers/block/drbd/drbd_interval.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/block/drbd/drbd_interval.c 
b/drivers/block/drbd/drbd_interval.c
index 89c497c..04a14e0 100644
--- a/drivers/block/drbd/drbd_interval.c
+++ b/drivers/block/drbd/drbd_interval.c
@@ -79,6 +79,7 @@ bool
 drbd_insert_interval(struct rb_root *root, struct drbd_interval *this)
 {
struct rb_node **new = &root->rb_node, *parent = NULL;
+   sector_t this_end = this->sector + (this->size >> 9);
 
BUG_ON(!IS_ALIGNED(this->size, 512));
 
@@ -87,6 +88,8 @@ drbd_insert_interval(struct rb_root *root, struct 
drbd_interval *this)
rb_entry(*new, struct drbd_interval, rb);
 
parent = *new;
+   if (here->end < this_end)
+   here->end = this_end;
if (this->sector < here->sector)
new = &(*new)->rb_left;
else if (this->sector > here->sector)
@@ -99,6 +102,7 @@ drbd_insert_interval(struct rb_root *root, struct 
drbd_interval *this)
return false;
}
 
+   this->end = this_end;
rb_link_node(&this->rb, parent, new);
rb_insert_augmented(&this->rb, root, &augment_callbacks);
return true;
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/9] drbd: Remove superfluous newline from "resync_extents" debugfs entry.

2014-09-11 Thread Philipp Reisner

From: Philipp Marek 

See "drbd/resources/*/volumes/*/resync_extents".

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/drbd/drbd_debugfs.c 
b/drivers/block/drbd/drbd_debugfs.c
index 5c20b18..900d4d3 100644
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -695,7 +695,7 @@ static void resync_dump_detail(struct seq_file *m, struct 
lc_element *e)
 {
struct bm_extent *bme = lc_entry(e, struct bm_extent, lce);
 
-   seq_printf(m, "%5d %s %s %s\n", bme->rs_left,
+   seq_printf(m, "%5d %s %s %s", bme->rs_left,
  test_bit(BME_NO_WRITES, &bme->flags) ? "NO_WRITES" : 
"-",
  test_bit(BME_LOCKED, &bme->flags) ? "LOCKED" : "--",
  test_bit(BME_PRIORITY, &bme->flags) ? "PRIORITY" : ""
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/9] DRBD updates for 3.18

2014-09-11 Thread Philipp Reisner

Hi Jens,

please consider to put these patches into your (future) for-3.18/drivers
branch. I have posted it for review on August 28. It consists mainly of
minor cleanups, and an important performance improvement. With that
DRBD was seen to write with 160k IOPS, on SSDs.

You may take the patches from this posting, or pull them from
http://git.drbd.org/linux-2.6-drbd.git for-3.18-rc1
the branch there is based on Linux 3.17-rc4.

Andreas Gruenbacher (5):
  drbd: Use better variable names
  drbd: Use consistent names for all the bi_end_io callbacks
  drbd: Avoid inconsistent locking warning
  drbd: Get rid of the __no_warn and __cond_lock macros
  drbd: Get rid of the WORK_PENDING macro

Lars Ellenberg (2):
  drbd: Improve asender performance
  drbd: reduce lock contention in drbd_worker

Philipp Marek (1):
  drbd: Remove superfluous newline from "resync_extents" debugfs entry.

Philipp Reisner (1):
  drbd: Add missing newline in resync progress display in /proc/drbd

 drivers/block/drbd/drbd_actlog.c   |  4 +--
 drivers/block/drbd/drbd_bitmap.c   |  6 ++---
 drivers/block/drbd/drbd_debugfs.c  |  2 +-
 drivers/block/drbd/drbd_int.h  | 19 +++---
 drivers/block/drbd/drbd_main.c | 28 ++--
 drivers/block/drbd/drbd_proc.c |  4 ++-
 drivers/block/drbd/drbd_receiver.c | 52 +-
 drivers/block/drbd/drbd_req.c  |  2 +-
 drivers/block/drbd/drbd_state.c| 18 ++---
 drivers/block/drbd/drbd_worker.c   | 51 +++--
 10 files changed, 93 insertions(+), 93 deletions(-)

-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 4/9] drbd: Avoid inconsistent locking warning

2014-09-11 Thread Philipp Reisner

From: Andreas Gruenbacher 

request_timer_fn() takes resource->req_lock via the device and releases it via
the connection.  Avoid this as it is confusing static code checkers.

Reported-by: "Dan Carpenter" 
Signed-off-by: Andreas Gruenbacher 

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_req.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index c67717d..5a01c53 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1645,6 +1645,6 @@ void request_timer_fn(unsigned long data)
? oldest_submit_jif + dt : now + et;
nt = time_before(ent, dt) ? ent : dt;
 out:
-   spin_unlock_irq(&connection->resource->req_lock);
+   spin_unlock_irq(&device->resource->req_lock);
mod_timer(&device->request_timer, nt);
 }
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/9] drbd: Use consistent names for all the bi_end_io callbacks

2014-09-11 Thread Philipp Reisner

From: Andreas Gruenbacher 

Now they follow the _endio naming sheme.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_actlog.c | 4 ++--
 drivers/block/drbd/drbd_bitmap.c | 6 +++---
 drivers/block/drbd/drbd_int.h| 2 +-
 drivers/block/drbd/drbd_worker.c | 6 +++---
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index d26a3fa..a2dfa16 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -158,14 +158,14 @@ static int _drbd_md_sync_page_io(struct drbd_device 
*device,
if (bio_add_page(bio, device->md_io.page, size, 0) != size)
goto out;
bio->bi_private = device;
-   bio->bi_end_io = drbd_md_io_complete;
+   bio->bi_end_io = drbd_md_endio;
bio->bi_rw = rw;
 
if (!(rw & WRITE) && device->state.disk == D_DISKLESS && device->ldev 
== NULL)
/* special case, drbd_md_read() during drbd_adm_attach(): no 
get_ldev */
;
else if (!get_ldev_if_state(device, D_ATTACHING)) {
-   /* Corresponding put_ldev in drbd_md_io_complete() */
+   /* Corresponding put_ldev in drbd_md_endio() */
drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in 
_drbd_md_sync_page_io()\n");
err = -ENODEV;
goto out;
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 426c97a..434c77d 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -941,7 +941,7 @@ static void drbd_bm_aio_ctx_destroy(struct kref *kref)
 }
 
 /* bv_page may be a copy, or may be the original */
-static void bm_async_io_complete(struct bio *bio, int error)
+static void drbd_bm_endio(struct bio *bio, int error)
 {
struct drbd_bm_aio_ctx *ctx = bio->bi_private;
struct drbd_device *device = ctx->device;
@@ -1027,7 +1027,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, 
int page_nr) __must_ho
 * according to api.  Do we want to assert that? */
bio_add_page(bio, page, len, 0);
bio->bi_private = ctx;
-   bio->bi_end_io = bm_async_io_complete;
+   bio->bi_end_io = drbd_bm_endio;
 
if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : 
DRBD_FAULT_MD_RD)) {
bio->bi_rw |= rw;
@@ -1125,7 +1125,7 @@ static int bm_rw(struct drbd_device *device, const 
unsigned int flags, unsigned
}
 
/*
-* We initialize ctx->in_flight to one to make sure bm_async_io_complete
+* We initialize ctx->in_flight to one to make sure drbd_bm_endio
 * will not set ctx->done early, and decrement / test it here.  If there
 * are still some bios in flight, we need to wait for them here.
 * If all IO is done already (or nothing had been submitted), there is
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 9e1288e..f424dc0 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1483,7 +1483,7 @@ extern int drbd_khelper(struct drbd_device *device, char 
*cmd);
 
 /* drbd_worker.c */
 /* bi_end_io handlers */
-extern void drbd_md_io_complete(struct bio *bio, int error);
+extern void drbd_md_endio(struct bio *bio, int error);
 extern void drbd_peer_request_endio(struct bio *bio, int error);
 extern void drbd_request_endio(struct bio *bio, int error);
 extern int drbd_worker(struct drbd_thread *thi);
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 50776b3..6e01e62 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -43,10 +43,10 @@ static int make_ov_request(struct drbd_device *, int);
 static int make_resync_request(struct drbd_device *, int);
 
 /* endio handlers:
- *   drbd_md_io_complete (defined here)
+ *   drbd_md_endio (defined here)
  *   drbd_request_endio (defined here)
  *   drbd_peer_request_endio (defined here)
- *   bm_async_io_complete (defined in drbd_bitmap.c)
+ *   drbd_bm_endio (defined in drbd_bitmap.c)
  *
  * For all these callbacks, note the following:
  * The callbacks will be called in irq context by the IDE drivers,
@@ -65,7 +65,7 @@ rwlock_t global_state_lock;
 /* used for synchronous meta data and bitmap IO
  * submitted by drbd_md_sync_page_io()
  */
-void drbd_md_io_complete(struct bio *bio, int error)
+void drbd_md_endio(struct bio *bio, int error)
 {
struct drbd_device *device;
 
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 6/9] drbd: Get rid of the WORK_PENDING macro

2014-09-11 Thread Philipp Reisner

From: Andreas Gruenbacher 

This macro doesn't add any value; just use test_bit() instead.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_worker.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index b20cd21..3b74f08 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1931,19 +1931,18 @@ void __update_timing_details(
++(*cb_nr);
 }
 
-#define WORK_PENDING(work_bit, todo)   (todo & (1UL << work_bit))
 static void do_device_work(struct drbd_device *device, const unsigned long 
todo)
 {
-   if (WORK_PENDING(MD_SYNC, todo))
+   if (test_bit(MD_SYNC, &todo))
do_md_sync(device);
-   if (WORK_PENDING(RS_DONE, todo) ||
-   WORK_PENDING(RS_PROGRESS, todo))
-   update_on_disk_bitmap(device, WORK_PENDING(RS_DONE, todo));
-   if (WORK_PENDING(GO_DISKLESS, todo))
+   if (test_bit(RS_DONE, &todo) ||
+   test_bit(RS_PROGRESS, &todo))
+   update_on_disk_bitmap(device, test_bit(RS_DONE, &todo));
+   if (test_bit(GO_DISKLESS, &todo))
go_diskless(device);
-   if (WORK_PENDING(DESTROY_DISK, todo))
+   if (test_bit(DESTROY_DISK, &todo))
drbd_ldev_destroy(device);
-   if (WORK_PENDING(RS_START, todo))
+   if (test_bit(RS_START, &todo))
do_start_resync(device);
 }
 
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 7/9] drbd: Improve asender performance

2014-09-11 Thread Philipp Reisner

From: Lars Ellenberg 

Shorten receive path in the asender thread. Reduces CPU utilisation
of asender when receiving packets, and with that increases IOPs.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_receiver.c |  6 ++
 drivers/block/drbd/drbd_worker.c   | 11 +--
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index 3ae769e..6960fb0 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -5561,6 +5561,7 @@ int drbd_asender(struct drbd_thread *thi)
 * rv <  expected: "woken" by signal during receive
 * rv == 0   : "connection shut down by peer"
 */
+received_more:
if (likely(rv > 0)) {
received += rv;
buf  += rv;
@@ -5636,6 +5637,11 @@ int drbd_asender(struct drbd_thread *thi)
expect   = header_size;
cmd  = NULL;
}
+   if (test_bit(SEND_PING, &connection->flags))
+   continue;
+   rv = drbd_recv_short(connection->meta.socket, buf, 
expect-received, MSG_DONTWAIT);
+   if (rv > 0)
+   goto received_more;
}
 
if (0) {
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 3b74f08..3ed2d87 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1994,22 +1994,13 @@ static bool dequeue_work_batch(struct drbd_work_queue 
*queue, struct list_head *
return !list_empty(work_list);
 }
 
-static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head 
*work_list)
-{
-   spin_lock_irq(&queue->q_lock);
-   if (!list_empty(&queue->q))
-   list_move(queue->q.next, work_list);
-   spin_unlock_irq(&queue->q_lock);
-   return !list_empty(work_list);
-}
-
 static void wait_for_work(struct drbd_connection *connection, struct list_head 
*work_list)
 {
DEFINE_WAIT(wait);
struct net_conf *nc;
int uncork, cork;
 
-   dequeue_work_item(&connection->sender_work, work_list);
+   dequeue_work_batch(&connection->sender_work, work_list);
if (!list_empty(work_list))
return;
 
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 9/9] drbd: Add missing newline in resync progress display in /proc/drbd

2014-09-11 Thread Philipp Reisner

Was broken in 2010 with commit 4b0715f096

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_proc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 06e6147..3b10fa6 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -142,10 +142,12 @@ static void drbd_syncer_progress(struct drbd_device 
*device, struct seq_file *se
(unsigned long) Bit2KB(rs_left >> 10),
(unsigned long) Bit2KB(rs_total >> 10));
else
-   seq_printf(seq, "(%lu/%lu)K\n\t",
+   seq_printf(seq, "(%lu/%lu)K",
(unsigned long) Bit2KB(rs_left),
(unsigned long) Bit2KB(rs_total));
 
+   seq_printf(seq, "\n\t");
+
/* see drivers/md/md.c
 * We do not want to overflow, so the order of operands and
 * the * 100 / 100 trick are important. We do a +1 to be
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 8/9] drbd: reduce lock contention in drbd_worker

2014-09-11 Thread Philipp Reisner

From: Lars Ellenberg 

The worker may now dequeue work items in batches.
This should reduce lock contention during busy periods.

Signed-off-by: Philipp Reisner 
Signed-off-by: Lars Ellenberg 
---
 drivers/block/drbd/drbd_worker.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 3ed2d87..d2d1f97 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -2026,8 +2026,6 @@ static void wait_for_work(struct drbd_connection 
*connection, struct list_head *
prepare_to_wait(&connection->sender_work.q_wait, &wait, 
TASK_INTERRUPTIBLE);
spin_lock_irq(&connection->resource->req_lock);
spin_lock(&connection->sender_work.q_lock); /* FIXME get 
rid of this one? */
-   /* dequeue single item only,
-* we still use drbd_queue_work_front() in some places */
if (!list_empty(&connection->sender_work.q))
list_splice_tail_init(&connection->sender_work.q, 
work_list);
spin_unlock(&connection->sender_work.q_lock);   /* FIXME get 
rid of this one? */
@@ -2114,7 +2112,7 @@ int drbd_worker(struct drbd_thread *thi)
if (get_t_state(thi) != RUNNING)
break;
 
-   while (!list_empty(&work_list)) {
+   if (!list_empty(&work_list)) {
w = list_first_entry(&work_list, struct drbd_work, 
list);
list_del_init(&w->list);
update_worker_timing_details(connection, w->cb);
@@ -2130,13 +2128,13 @@ int drbd_worker(struct drbd_thread *thi)
update_worker_timing_details(connection, 
do_unqueued_work);
do_unqueued_work(connection);
}
-   while (!list_empty(&work_list)) {
+   if (!list_empty(&work_list)) {
w = list_first_entry(&work_list, struct drbd_work, 
list);
list_del_init(&w->list);
update_worker_timing_details(connection, w->cb);
w->cb(w, 1);
-   }
-   dequeue_work_batch(&connection->sender_work, &work_list);
+   } else
+   dequeue_work_batch(&connection->sender_work, 
&work_list);
} while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, 
&connection->flags));
 
rcu_read_lock();
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

1 2 3 4 5 >

1 - 100 of 455 matches

Mail list logo