[dm-devel] 4.1-rc2 dm-multipath-mq kernel warning
Mike Snitzer
snitzer at redhat.com
Wed May 27 17:00:01 UTC 2015
On Wed, May 27 2015 at 12:14P -0400,
Mike Snitzer <snitzer at redhat.com> wrote:
> On Wed, May 27 2015 at 11:33am -0400,
> Bart Van Assche <bart.vanassche at sandisk.com> wrote:
>
> > On 05/27/15 17:29, Bart Van Assche wrote:
> > >On 05/27/15 14:57, Mike Snitzer wrote:
> > >>Looks like Junichi likely fixed this issue you reported, please try this
> > >>patch: https://patchwork.kernel.org/patch/6487321/
> > >
> > >Hello Mike,
> > >
> > >On a setup on which an I/O verification test passes with
> > >blk-mq/scsi-mq/dm-mq disabled, this is what fio reports after a few
> > >minutes with scsi-mq and dm-mq enabled:
> > >
> > >test: Laying out IO file(s) (1 file(s) / 10MB)
> > >fio: io_u error on file /mnt/test.0.0: Input/output error: write
> > >offset=8327168, buflen=4096
> > >fio: io_u error on file /mnt/test.0.0: Input/output error: write
> > >offset=9007104, buflen=4096
> > >fio: pid=4568, err=5/file:io_u.c:1564, func=io_u error,
> > >error=Input/output error
>
> I'll look closer at this.. so NULL pointer is fixed but this test hits
> IO errors.
Further code inspection revealed an issue with dm-mq enabled but scsi-mq
disabled (when requeuing the original request after clone_rq() failure DM
core wasn't unwinding the dm_start_request() accounting). The following
patch will fix this issue. I've also switched the dm-mq on scsi-mq case
to return BLK_MQ_RQ_QUEUE_BUSY directly (like hch suggested last week).
I have no idea if this would actually fix your case (would be surprising
but worth a shot I suppose).
Anyway, feel free to try this patch:
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 85966ee..02e2d1f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1115,23 +1115,37 @@ static void old_requeue_request(struct request *rq)
spin_unlock_irqrestore(q->queue_lock, flags);
}
-static void dm_requeue_original_request(struct mapped_device *md,
- struct request *rq)
+static void __dm_requeue_original_request(struct mapped_device *md,
+ struct request *rq, bool in_blk_mq_queue_rq)
{
int rw = rq_data_dir(rq);
dm_unprep_request(rq);
- if (!rq->q->mq_ops)
- old_requeue_request(rq);
- else {
- blk_mq_requeue_request(rq);
- blk_mq_kick_requeue_list(rq->q);
+ if (!in_blk_mq_queue_rq) {
+ if (!rq->q->mq_ops)
+ old_requeue_request(rq);
+ else {
+ blk_mq_requeue_request(rq);
+ blk_mq_kick_requeue_list(rq->q);
+ }
}
rq_completed(md, rw, false);
}
+static void dm_requeue_original_request(struct mapped_device *md,
+ struct request *rq)
+{
+ return __dm_requeue_original_request(md, rq, false);
+}
+
+static void dm_unprep_before_requeuing_original_request(struct mapped_device *md,
+ struct request *rq)
+{
+ return __dm_requeue_original_request(md, rq, true);
+}
+
static void old_stop_queue(struct request_queue *q)
{
unsigned long flags;
@@ -2679,15 +2693,18 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
/* clone request is allocated at the end of the pdu */
tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io);
if (!clone_rq(rq, md, tio, GFP_ATOMIC))
- return BLK_MQ_RQ_QUEUE_BUSY;
+ goto out_requeue;
queue_kthread_work(&md->kworker, &tio->work);
} else {
/* Direct call is fine since .queue_rq allows allocations */
if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
- dm_requeue_original_request(md, rq);
+ goto out_requeue;
}
return BLK_MQ_RQ_QUEUE_OK;
+out_requeue:
+ dm_unprep_before_requeuing_original_request(md, rq);
+ return BLK_MQ_RQ_QUEUE_BUSY;
}
static struct blk_mq_ops dm_mq_ops = {
More information about the dm-devel
mailing list