Merge branch 'for-linus' of git://git.kernel.dk/linux-block
* 'for-linus' of git://git.kernel.dk/linux-block: (23 commits) Revert "cfq: Remove special treatment for metadata rqs." block: fix flush machinery for stacking drivers with differring flush flags block: improve rq_affinity placement blktrace: add FLUSH/FUA support Move some REQ flags to the common bio/request area allow blk_flush_policy to return REQ_FSEQ_DATA independent of *FLUSH xen/blkback: Make description more obvious. cfq-iosched: Add documentation about idling block: Make rq_affinity = 1 work as expected block: swim3: fix unterminated of_device_id table block/genhd.c: remove useless cast in diskstats_show() drivers/cdrom/cdrom.c: relax check on dvd manufacturer value drivers/block/drbd/drbd_nl.c: use bitmap_parse instead of __bitmap_parse bsg-lib: add module.h include cfq-iosched: Reduce linked group count upon group destruction blk-throttle: correctly determine sync bio loop: fix deadlock when sysfs and LOOP_CLR_FD race against each other loop: add BLK_DEV_LOOP_MIN_COUNT=%i to allow distros 0 pre-allocated loop devices loop: add management interface for on-demand device allocation loop: replace linked list of allocated devices with an idr index ...
This commit is contained in:
commit
5ccc38740a
26 changed files with 801 additions and 136 deletions
|
@ -43,3 +43,74 @@ If one sets slice_idle=0 and if storage supports NCQ, CFQ internally switches
|
|||
to IOPS mode and starts providing fairness in terms of number of requests
|
||||
dispatched. Note that this mode switching takes effect only for group
|
||||
scheduling. For non-cgroup users nothing should change.
|
||||
|
||||
CFQ IO scheduler Idling Theory
|
||||
===============================
|
||||
Idling on a queue is primarily about waiting for the next request to come
|
||||
on same queue after completion of a request. In this process CFQ will not
|
||||
dispatch requests from other cfq queues even if requests are pending there.
|
||||
|
||||
The rationale behind idling is that it can cut down on number of seeks
|
||||
on rotational media. For example, if a process is doing dependent
|
||||
sequential reads (next read will come on only after completion of previous
|
||||
one), then not dispatching request from other queue should help as we
|
||||
did not move the disk head and kept on dispatching sequential IO from
|
||||
one queue.
|
||||
|
||||
CFQ has following service trees and various queues are put on these trees.
|
||||
|
||||
sync-idle sync-noidle async
|
||||
|
||||
All cfq queues doing synchronous sequential IO go on to sync-idle tree.
|
||||
On this tree we idle on each queue individually.
|
||||
|
||||
All synchronous non-sequential queues go on sync-noidle tree. Also any
|
||||
request which are marked with REQ_NOIDLE go on this service tree. On this
|
||||
tree we do not idle on individual queues instead idle on the whole group
|
||||
of queues or the tree. So if there are 4 queues waiting for IO to dispatch
|
||||
we will idle only once last queue has dispatched the IO and there is
|
||||
no more IO on this service tree.
|
||||
|
||||
All async writes go on async service tree. There is no idling on async
|
||||
queues.
|
||||
|
||||
CFQ has some optimizations for SSDs and if it detects a non-rotational
|
||||
media which can support higher queue depth (multiple requests at in
|
||||
flight at a time), then it cuts down on idling of individual queues and
|
||||
all the queues move to sync-noidle tree and only tree idle remains. This
|
||||
tree idling provides isolation with buffered write queues on async tree.
|
||||
|
||||
FAQ
|
||||
===
|
||||
Q1. Why to idle at all on queues marked with REQ_NOIDLE.
|
||||
|
||||
A1. We only do tree idle (all queues on sync-noidle tree) on queues marked
|
||||
with REQ_NOIDLE. This helps in providing isolation with all the sync-idle
|
||||
queues. Otherwise in presence of many sequential readers, other
|
||||
synchronous IO might not get fair share of disk.
|
||||
|
||||
For example, if there are 10 sequential readers doing IO and they get
|
||||
100ms each. If a REQ_NOIDLE request comes in, it will be scheduled
|
||||
roughly after 1 second. If after completion of REQ_NOIDLE request we
|
||||
do not idle, and after a couple of milli seconds a another REQ_NOIDLE
|
||||
request comes in, again it will be scheduled after 1second. Repeat it
|
||||
and notice how a workload can lose its disk share and suffer due to
|
||||
multiple sequential readers.
|
||||
|
||||
fsync can generate dependent IO where bunch of data is written in the
|
||||
context of fsync, and later some journaling data is written. Journaling
|
||||
data comes in only after fsync has finished its IO (atleast for ext4
|
||||
that seemed to be the case). Now if one decides not to idle on fsync
|
||||
thread due to REQ_NOIDLE, then next journaling write will not get
|
||||
scheduled for another second. A process doing small fsync, will suffer
|
||||
badly in presence of multiple sequential readers.
|
||||
|
||||
Hence doing tree idling on threads using REQ_NOIDLE flag on requests
|
||||
provides isolation from multiple sequential readers and at the same
|
||||
time we do not idle on individual threads.
|
||||
|
||||
Q2. When to specify REQ_NOIDLE
|
||||
A2. I would think whenever one is doing synchronous write and not expecting
|
||||
more writes to be dispatched from same context soon, should be able
|
||||
to specify REQ_NOIDLE on writes and that probably should work well for
|
||||
most of the cases.
|
||||
|
|
|
@ -1350,9 +1350,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
it is equivalent to "nosmp", which also disables
|
||||
the IO APIC.
|
||||
|
||||
max_loop= [LOOP] Maximum number of loopback devices that can
|
||||
be mounted
|
||||
Format: <1-256>
|
||||
max_loop= [LOOP] The number of loop block devices that get
|
||||
(loop.max_loop) unconditionally pre-created at init time. The default
|
||||
number is configured by BLK_DEV_LOOP_MIN_COUNT. Instead
|
||||
of statically allocating a predefined number, loop
|
||||
devices can be requested on-demand with the
|
||||
/dev/loop-control interface.
|
||||
|
||||
mcatest= [IA-64]
|
||||
|
||||
|
|
|
@ -65,6 +65,16 @@ config BLK_DEV_BSG
|
|||
|
||||
If unsure, say Y.
|
||||
|
||||
config BLK_DEV_BSGLIB
|
||||
bool "Block layer SG support v4 helper lib"
|
||||
default n
|
||||
select BLK_DEV_BSG
|
||||
help
|
||||
Subsystems will normally enable this if needed. Users will not
|
||||
normally need to manually enable this.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config BLK_DEV_INTEGRITY
|
||||
bool "Block layer data integrity support"
|
||||
---help---
|
||||
|
|
|
@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
|
|||
blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
|
||||
|
||||
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
|
||||
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
|
||||
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
|
||||
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
|
||||
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
|
||||
|
|
|
@ -1702,6 +1702,7 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits);
|
|||
int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
unsigned long flags;
|
||||
int where = ELEVATOR_INSERT_BACK;
|
||||
|
||||
if (blk_rq_check_limits(q, rq))
|
||||
return -EIO;
|
||||
|
@ -1718,7 +1719,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
|
|||
*/
|
||||
BUG_ON(blk_queued_rq(rq));
|
||||
|
||||
add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
|
||||
if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
|
||||
where = ELEVATOR_INSERT_FLUSH;
|
||||
|
||||
add_acct_request(q, rq, where);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
|
||||
return 0;
|
||||
|
@ -2275,7 +2279,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
|
|||
* %false - we are done with this request
|
||||
* %true - still buffers pending for this request
|
||||
**/
|
||||
static bool __blk_end_bidi_request(struct request *rq, int error,
|
||||
bool __blk_end_bidi_request(struct request *rq, int error,
|
||||
unsigned int nr_bytes, unsigned int bidi_bytes)
|
||||
{
|
||||
if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
|
||||
|
|
|
@ -95,11 +95,12 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
|
|||
{
|
||||
unsigned int policy = 0;
|
||||
|
||||
if (blk_rq_sectors(rq))
|
||||
policy |= REQ_FSEQ_DATA;
|
||||
|
||||
if (fflags & REQ_FLUSH) {
|
||||
if (rq->cmd_flags & REQ_FLUSH)
|
||||
policy |= REQ_FSEQ_PREFLUSH;
|
||||
if (blk_rq_sectors(rq))
|
||||
policy |= REQ_FSEQ_DATA;
|
||||
if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
|
||||
policy |= REQ_FSEQ_POSTFLUSH;
|
||||
}
|
||||
|
@ -122,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq)
|
|||
|
||||
/* make @rq a normal request */
|
||||
rq->cmd_flags &= ~REQ_FLUSH_SEQ;
|
||||
rq->end_io = NULL;
|
||||
rq->end_io = rq->flush.saved_end_io;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -300,9 +301,6 @@ void blk_insert_flush(struct request *rq)
|
|||
unsigned int fflags = q->flush_flags; /* may change, cache */
|
||||
unsigned int policy = blk_flush_policy(fflags, rq);
|
||||
|
||||
BUG_ON(rq->end_io);
|
||||
BUG_ON(!rq->bio || rq->bio != rq->biotail);
|
||||
|
||||
/*
|
||||
* @policy now records what operations need to be done. Adjust
|
||||
* REQ_FLUSH and FUA for the driver.
|
||||
|
@ -311,6 +309,19 @@ void blk_insert_flush(struct request *rq)
|
|||
if (!(fflags & REQ_FUA))
|
||||
rq->cmd_flags &= ~REQ_FUA;
|
||||
|
||||
/*
|
||||
* An empty flush handed down from a stacking driver may
|
||||
* translate into nothing if the underlying device does not
|
||||
* advertise a write-back cache. In this case, simply
|
||||
* complete the request.
|
||||
*/
|
||||
if (!policy) {
|
||||
__blk_end_bidi_request(rq, 0, 0, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
BUG_ON(!rq->bio || rq->bio != rq->biotail);
|
||||
|
||||
/*
|
||||
* If there's data but flush is not necessary, the request can be
|
||||
* processed directly without going through flush machinery. Queue
|
||||
|
@ -319,6 +330,7 @@ void blk_insert_flush(struct request *rq)
|
|||
if ((policy & REQ_FSEQ_DATA) &&
|
||||
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
|
||||
list_add_tail(&rq->queuelist, &q->queue_head);
|
||||
blk_run_queue_async(q);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -329,6 +341,7 @@ void blk_insert_flush(struct request *rq)
|
|||
memset(&rq->flush, 0, sizeof(rq->flush));
|
||||
INIT_LIST_HEAD(&rq->flush.list);
|
||||
rq->cmd_flags |= REQ_FLUSH_SEQ;
|
||||
rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
|
||||
rq->end_io = flush_data_end_io;
|
||||
|
||||
blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
|
||||
|
|
|
@ -124,6 +124,14 @@ void __blk_complete_request(struct request *req)
|
|||
} else
|
||||
ccpu = cpu;
|
||||
|
||||
/*
|
||||
* If current CPU and requested CPU are in the same group, running
|
||||
* softirq in current CPU. One might concern this is just like
|
||||
* QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
|
||||
* running in interrupt handler, and currently I/O controller doesn't
|
||||
* support multiple interrupts, so current CPU is unique actually. This
|
||||
* avoids IPI sending from current CPU to the first CPU of a group.
|
||||
*/
|
||||
if (ccpu == cpu || ccpu == group_cpu) {
|
||||
struct list_head *list;
|
||||
do_local:
|
||||
|
|
|
@ -746,7 +746,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg,
|
|||
static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
|
||||
{
|
||||
bool rw = bio_data_dir(bio);
|
||||
bool sync = bio->bi_rw & REQ_SYNC;
|
||||
bool sync = rw_is_sync(bio->bi_rw);
|
||||
|
||||
/* Charge the bio to the group */
|
||||
tg->bytes_disp[rw] += bio->bi_size;
|
||||
|
@ -1150,7 +1150,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
|
|||
|
||||
if (tg_no_rule_group(tg, rw)) {
|
||||
blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
|
||||
rw, bio->bi_rw & REQ_SYNC);
|
||||
rw, rw_is_sync(bio->bi_rw));
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
|
|||
struct bio *bio);
|
||||
void blk_dequeue_request(struct request *rq);
|
||||
void __blk_queue_free_tags(struct request_queue *q);
|
||||
bool __blk_end_bidi_request(struct request *rq, int error,
|
||||
unsigned int nr_bytes, unsigned int bidi_bytes);
|
||||
|
||||
void blk_rq_timed_out_timer(unsigned long data);
|
||||
void blk_delete_timer(struct request *);
|
||||
|
|
298
block/bsg-lib.c
Normal file
298
block/bsg-lib.c
Normal file
|
@ -0,0 +1,298 @@
|
|||
/*
|
||||
* BSG helper library
|
||||
*
|
||||
* Copyright (C) 2008 James Smart, Emulex Corporation
|
||||
* Copyright (C) 2011 Red Hat, Inc. All rights reserved.
|
||||
* Copyright (C) 2011 Mike Christie
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
#include <linux/slab.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/bsg-lib.h>
|
||||
#include <linux/module.h>
|
||||
#include <scsi/scsi_cmnd.h>
|
||||
|
||||
/**
|
||||
* bsg_destroy_job - routine to teardown/delete a bsg job
|
||||
* @job: bsg_job that is to be torn down
|
||||
*/
|
||||
static void bsg_destroy_job(struct bsg_job *job)
|
||||
{
|
||||
put_device(job->dev); /* release reference for the request */
|
||||
|
||||
kfree(job->request_payload.sg_list);
|
||||
kfree(job->reply_payload.sg_list);
|
||||
kfree(job);
|
||||
}
|
||||
|
||||
/**
|
||||
* bsg_job_done - completion routine for bsg requests
|
||||
* @job: bsg_job that is complete
|
||||
* @result: job reply result
|
||||
* @reply_payload_rcv_len: length of payload recvd
|
||||
*
|
||||
* The LLD should call this when the bsg job has completed.
|
||||
*/
|
||||
void bsg_job_done(struct bsg_job *job, int result,
|
||||
unsigned int reply_payload_rcv_len)
|
||||
{
|
||||
struct request *req = job->req;
|
||||
struct request *rsp = req->next_rq;
|
||||
int err;
|
||||
|
||||
err = job->req->errors = result;
|
||||
if (err < 0)
|
||||
/* we're only returning the result field in the reply */
|
||||
job->req->sense_len = sizeof(u32);
|
||||
else
|
||||
job->req->sense_len = job->reply_len;
|
||||
/* we assume all request payload was transferred, residual == 0 */
|
||||
req->resid_len = 0;
|
||||
|
||||
if (rsp) {
|
||||
WARN_ON(reply_payload_rcv_len > rsp->resid_len);
|
||||
|
||||
/* set reply (bidi) residual */
|
||||
rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len);
|
||||
}
|
||||
blk_complete_request(req);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bsg_job_done);
|
||||
|
||||
/**
|
||||
* bsg_softirq_done - softirq done routine for destroying the bsg requests
|
||||
* @rq: BSG request that holds the job to be destroyed
|
||||
*/
|
||||
static void bsg_softirq_done(struct request *rq)
|
||||
{
|
||||
struct bsg_job *job = rq->special;
|
||||
|
||||
blk_end_request_all(rq, rq->errors);
|
||||
bsg_destroy_job(job);
|
||||
}
|
||||
|
||||
static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
|
||||
{
|
||||
size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments);
|
||||
|
||||
BUG_ON(!req->nr_phys_segments);
|
||||
|
||||
buf->sg_list = kzalloc(sz, GFP_KERNEL);
|
||||
if (!buf->sg_list)
|
||||
return -ENOMEM;
|
||||
sg_init_table(buf->sg_list, req->nr_phys_segments);
|
||||
buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
|
||||
buf->payload_len = blk_rq_bytes(req);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* bsg_create_job - create the bsg_job structure for the bsg request
|
||||
* @dev: device that is being sent the bsg request
|
||||
* @req: BSG request that needs a job structure
|
||||
*/
|
||||
static int bsg_create_job(struct device *dev, struct request *req)
|
||||
{
|
||||
struct request *rsp = req->next_rq;
|
||||
struct request_queue *q = req->q;
|
||||
struct bsg_job *job;
|
||||
int ret;
|
||||
|
||||
BUG_ON(req->special);
|
||||
|
||||
job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
|
||||
if (!job)
|
||||
return -ENOMEM;
|
||||
|
||||
req->special = job;
|
||||
job->req = req;
|
||||
if (q->bsg_job_size)
|
||||
job->dd_data = (void *)&job[1];
|
||||
job->request = req->cmd;
|
||||
job->request_len = req->cmd_len;
|
||||
job->reply = req->sense;
|
||||
job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
|
||||
* allocated */
|
||||
if (req->bio) {
|
||||
ret = bsg_map_buffer(&job->request_payload, req);
|
||||
if (ret)
|
||||
goto failjob_rls_job;
|
||||
}
|
||||
if (rsp && rsp->bio) {
|
||||
ret = bsg_map_buffer(&job->reply_payload, rsp);
|
||||
if (ret)
|
||||
goto failjob_rls_rqst_payload;
|
||||
}
|
||||
job->dev = dev;
|
||||
/* take a reference for the request */
|
||||
get_device(job->dev);
|
||||
return 0;
|
||||
|
||||
failjob_rls_rqst_payload:
|
||||
kfree(job->request_payload.sg_list);
|
||||
failjob_rls_job:
|
||||
kfree(job);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* bsg_goose_queue - restart queue in case it was stopped
|
||||
* @q: request q to be restarted
|
||||
*/
|
||||
void bsg_goose_queue(struct request_queue *q)
|
||||
{
|
||||
if (!q)
|
||||
return;
|
||||
|
||||
blk_run_queue_async(q);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bsg_goose_queue);
|
||||
|
||||
/**
|
||||
* bsg_request_fn - generic handler for bsg requests
|
||||
* @q: request queue to manage
|
||||
*
|
||||
* On error the create_bsg_job function should return a -Exyz error value
|
||||
* that will be set to the req->errors.
|
||||
*
|
||||
* Drivers/subsys should pass this to the queue init function.
|
||||
*/
|
||||
void bsg_request_fn(struct request_queue *q)
|
||||
{
|
||||
struct device *dev = q->queuedata;
|
||||
struct request *req;
|
||||
struct bsg_job *job;
|
||||
int ret;
|
||||
|
||||
if (!get_device(dev))
|
||||
return;
|
||||
|
||||
while (1) {
|
||||
req = blk_fetch_request(q);
|
||||
if (!req)
|
||||
break;
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
|
||||
ret = bsg_create_job(dev, req);
|
||||
if (ret) {
|
||||
req->errors = ret;
|
||||
blk_end_request_all(req, ret);
|
||||
spin_lock_irq(q->queue_lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
job = req->special;
|
||||
ret = q->bsg_job_fn(job);
|
||||
spin_lock_irq(q->queue_lock);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
put_device(dev);
|
||||
spin_lock_irq(q->queue_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bsg_request_fn);
|
||||
|
||||
/**
|
||||
* bsg_setup_queue - Create and add the bsg hooks so we can receive requests
|
||||
* @dev: device to attach bsg device to
|
||||
* @q: request queue setup by caller
|
||||
* @name: device to give bsg device
|
||||
* @job_fn: bsg job handler
|
||||
* @dd_job_size: size of LLD data needed for each job
|
||||
*
|
||||
* The caller should have setup the reuqest queue with bsg_request_fn
|
||||
* as the request_fn.
|
||||
*/
|
||||
int bsg_setup_queue(struct device *dev, struct request_queue *q,
|
||||
char *name, bsg_job_fn *job_fn, int dd_job_size)
|
||||
{
|
||||
int ret;
|
||||
|
||||
q->queuedata = dev;
|
||||
q->bsg_job_size = dd_job_size;
|
||||
q->bsg_job_fn = job_fn;
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
|
||||
blk_queue_softirq_done(q, bsg_softirq_done);
|
||||
blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
|
||||
|
||||
ret = bsg_register_queue(q, dev, name, NULL);
|
||||
if (ret) {
|
||||
printk(KERN_ERR "%s: bsg interface failed to "
|
||||
"initialize - register queue\n", dev->kobj.name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bsg_setup_queue);
|
||||
|
||||
/**
|
||||
* bsg_remove_queue - Deletes the bsg dev from the q
|
||||
* @q: the request_queue that is to be torn down.
|
||||
*
|
||||
* Notes:
|
||||
* Before unregistering the queue empty any requests that are blocked
|
||||
*/
|
||||
void bsg_remove_queue(struct request_queue *q)
|
||||
{
|
||||
struct request *req; /* block request */
|
||||
int counts; /* totals for request_list count and starved */
|
||||
|
||||
if (!q)
|
||||
return;
|
||||
|
||||
/* Stop taking in new requests */
|
||||
spin_lock_irq(q->queue_lock);
|
||||
blk_stop_queue(q);
|
||||
|
||||
/* drain all requests in the queue */
|
||||
while (1) {
|
||||
/* need the lock to fetch a request
|
||||
* this may fetch the same reqeust as the previous pass
|
||||
*/
|
||||
req = blk_fetch_request(q);
|
||||
/* save requests in use and starved */
|
||||
counts = q->rq.count[0] + q->rq.count[1] +
|
||||
q->rq.starved[0] + q->rq.starved[1];
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
/* any requests still outstanding? */
|
||||
if (counts == 0)
|
||||
break;
|
||||
|
||||
/* This may be the same req as the previous iteration,
|
||||
* always send the blk_end_request_all after a prefetch.
|
||||
* It is not okay to not end the request because the
|
||||
* prefetch started the request.
|
||||
*/
|
||||
if (req) {
|
||||
/* return -ENXIO to indicate that this queue is
|
||||
* going away
|
||||
*/
|
||||
req->errors = -ENXIO;
|
||||
blk_end_request_all(req, -ENXIO);
|
||||
}
|
||||
|
||||
msleep(200); /* allow bsg to possibly finish */
|
||||
spin_lock_irq(q->queue_lock);
|
||||
}
|
||||
bsg_unregister_queue(q);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bsg_remove_queue);
|
|
@ -130,6 +130,8 @@ struct cfq_queue {
|
|||
unsigned long slice_end;
|
||||
long slice_resid;
|
||||
|
||||
/* pending metadata requests */
|
||||
int meta_pending;
|
||||
/* number of requests that are on the dispatch list or inside driver */
|
||||
int dispatched;
|
||||
|
||||
|
@ -682,6 +684,9 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
|
|||
if (rq_is_sync(rq1) != rq_is_sync(rq2))
|
||||
return rq_is_sync(rq1) ? rq1 : rq2;
|
||||
|
||||
if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
|
||||
return rq1->cmd_flags & REQ_META ? rq1 : rq2;
|
||||
|
||||
s1 = blk_rq_pos(rq1);
|
||||
s2 = blk_rq_pos(rq2);
|
||||
|
||||
|
@ -1209,6 +1214,9 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
|
|||
|
||||
hlist_del_init(&cfqg->cfqd_node);
|
||||
|
||||
BUG_ON(cfqd->nr_blkcg_linked_grps <= 0);
|
||||
cfqd->nr_blkcg_linked_grps--;
|
||||
|
||||
/*
|
||||
* Put the reference taken at the time of creation so that when all
|
||||
* queues are gone, group can be destroyed.
|
||||
|
@ -1604,6 +1612,10 @@ static void cfq_remove_request(struct request *rq)
|
|||
cfqq->cfqd->rq_queued--;
|
||||
cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
|
||||
rq_data_dir(rq), rq_is_sync(rq));
|
||||
if (rq->cmd_flags & REQ_META) {
|
||||
WARN_ON(!cfqq->meta_pending);
|
||||
cfqq->meta_pending--;
|
||||
}
|
||||
}
|
||||
|
||||
static int cfq_merge(struct request_queue *q, struct request **req,
|
||||
|
@ -3356,6 +3368,13 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
|
|||
RB_EMPTY_ROOT(&cfqq->sort_list))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* So both queues are sync. Let the new request get disk time if
|
||||
* it's a metadata request and the current queue is doing regular IO.
|
||||
*/
|
||||
if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
|
||||
*/
|
||||
|
@ -3420,6 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
|
|||
struct cfq_io_context *cic = RQ_CIC(rq);
|
||||
|
||||
cfqd->rq_queued++;
|
||||
if (rq->cmd_flags & REQ_META)
|
||||
cfqq->meta_pending++;
|
||||
|
||||
cfq_update_io_thinktime(cfqd, cfqq, cic);
|
||||
cfq_update_io_seektime(cfqd, cfqq, rq);
|
||||
|
|
|
@ -1146,17 +1146,17 @@ static int diskstats_show(struct seq_file *seqf, void *v)
|
|||
cpu = part_stat_lock();
|
||||
part_round_stats(cpu, hd);
|
||||
part_stat_unlock();
|
||||
seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
|
||||
"%u %lu %lu %llu %u %u %u %u\n",
|
||||
seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
|
||||
"%u %lu %lu %lu %u %u %u %u\n",
|
||||
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
|
||||
disk_name(gp, hd->partno, buf),
|
||||
part_stat_read(hd, ios[READ]),
|
||||
part_stat_read(hd, merges[READ]),
|
||||
(unsigned long long)part_stat_read(hd, sectors[READ]),
|
||||
part_stat_read(hd, sectors[READ]),
|
||||
jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
|
||||
part_stat_read(hd, ios[WRITE]),
|
||||
part_stat_read(hd, merges[WRITE]),
|
||||
(unsigned long long)part_stat_read(hd, sectors[WRITE]),
|
||||
part_stat_read(hd, sectors[WRITE]),
|
||||
jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
|
||||
part_in_flight(hd),
|
||||
jiffies_to_msecs(part_stat_read(hd, io_ticks)),
|
||||
|
|
|
@ -256,6 +256,21 @@ config BLK_DEV_LOOP
|
|||
|
||||
Most users will answer N here.
|
||||
|
||||
config BLK_DEV_LOOP_MIN_COUNT
|
||||
int "Number of loop devices to pre-create at init time"
|
||||
depends on BLK_DEV_LOOP
|
||||
default 8
|
||||
help
|
||||
Static number of loop devices to be unconditionally pre-created
|
||||
at init time.
|
||||
|
||||
This default value can be overwritten on the kernel command
|
||||
line or with module-parameter loop.max_loop.
|
||||
|
||||
The historic default is 8. If a late 2011 version of losetup(8)
|
||||
is used, it can be set to 0, since needed loop devices can be
|
||||
dynamically allocated with the /dev/loop-control interface.
|
||||
|
||||
config BLK_DEV_CRYPTOLOOP
|
||||
tristate "Cryptoloop Support"
|
||||
select CRYPTO
|
||||
|
@ -471,7 +486,7 @@ config XEN_BLKDEV_FRONTEND
|
|||
in another domain which drives the actual block device.
|
||||
|
||||
config XEN_BLKDEV_BACKEND
|
||||
tristate "Block-device backend driver"
|
||||
tristate "Xen block-device backend driver"
|
||||
depends on XEN_BACKEND
|
||||
help
|
||||
The block-device backend driver allows the kernel to export its
|
||||
|
|
|
@ -1829,10 +1829,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
|
|||
|
||||
/* silently ignore cpu mask on UP kernel */
|
||||
if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) {
|
||||
err = __bitmap_parse(sc.cpu_mask, 32, 0,
|
||||
err = bitmap_parse(sc.cpu_mask, 32,
|
||||
cpumask_bits(new_cpu_mask), nr_cpu_ids);
|
||||
if (err) {
|
||||
dev_warn(DEV, "__bitmap_parse() failed with %d\n", err);
|
||||
dev_warn(DEV, "bitmap_parse() failed with %d\n", err);
|
||||
retcode = ERR_CPU_MASK_PARSE;
|
||||
goto fail;
|
||||
}
|
||||
|
|
|
@ -75,11 +75,11 @@
|
|||
#include <linux/kthread.h>
|
||||
#include <linux/splice.h>
|
||||
#include <linux/sysfs.h>
|
||||
|
||||
#include <linux/miscdevice.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
static LIST_HEAD(loop_devices);
|
||||
static DEFINE_MUTEX(loop_devices_mutex);
|
||||
static DEFINE_IDR(loop_index_idr);
|
||||
static DEFINE_MUTEX(loop_index_mutex);
|
||||
|
||||
static int max_part;
|
||||
static int part_shift;
|
||||
|
@ -722,17 +722,10 @@ static inline int is_loop_device(struct file *file)
|
|||
static ssize_t loop_attr_show(struct device *dev, char *page,
|
||||
ssize_t (*callback)(struct loop_device *, char *))
|
||||
{
|
||||
struct loop_device *l, *lo = NULL;
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
struct loop_device *lo = disk->private_data;
|
||||
|
||||
mutex_lock(&loop_devices_mutex);
|
||||
list_for_each_entry(l, &loop_devices, lo_list)
|
||||
if (disk_to_dev(l->lo_disk) == dev) {
|
||||
lo = l;
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&loop_devices_mutex);
|
||||
|
||||
return lo ? callback(lo, page) : -EIO;
|
||||
return callback(lo, page);
|
||||
}
|
||||
|
||||
#define LOOP_ATTR_RO(_name) \
|
||||
|
@ -750,10 +743,10 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
|
|||
ssize_t ret;
|
||||
char *p = NULL;
|
||||
|
||||
mutex_lock(&lo->lo_ctl_mutex);
|
||||
spin_lock_irq(&lo->lo_lock);
|
||||
if (lo->lo_backing_file)
|
||||
p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
|
||||
mutex_unlock(&lo->lo_ctl_mutex);
|
||||
spin_unlock_irq(&lo->lo_lock);
|
||||
|
||||
if (IS_ERR_OR_NULL(p))
|
||||
ret = PTR_ERR(p);
|
||||
|
@ -1007,7 +1000,9 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
|
|||
|
||||
kthread_stop(lo->lo_thread);
|
||||
|
||||
spin_lock_irq(&lo->lo_lock);
|
||||
lo->lo_backing_file = NULL;
|
||||
spin_unlock_irq(&lo->lo_lock);
|
||||
|
||||
loop_release_xfer(lo);
|
||||
lo->transfer = NULL;
|
||||
|
@ -1485,13 +1480,22 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
|
|||
|
||||
static int lo_open(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
struct loop_device *lo = bdev->bd_disk->private_data;
|
||||
struct loop_device *lo;
|
||||
int err = 0;
|
||||
|
||||
mutex_lock(&loop_index_mutex);
|
||||
lo = bdev->bd_disk->private_data;
|
||||
if (!lo) {
|
||||
err = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mutex_lock(&lo->lo_ctl_mutex);
|
||||
lo->lo_refcnt++;
|
||||
mutex_unlock(&lo->lo_ctl_mutex);
|
||||
|
||||
return 0;
|
||||
out:
|
||||
mutex_unlock(&loop_index_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int lo_release(struct gendisk *disk, fmode_t mode)
|
||||
|
@ -1557,40 +1561,71 @@ int loop_register_transfer(struct loop_func_table *funcs)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int unregister_transfer_cb(int id, void *ptr, void *data)
|
||||
{
|
||||
struct loop_device *lo = ptr;
|
||||
struct loop_func_table *xfer = data;
|
||||
|
||||
mutex_lock(&lo->lo_ctl_mutex);
|
||||
if (lo->lo_encryption == xfer)
|
||||
loop_release_xfer(lo);
|
||||
mutex_unlock(&lo->lo_ctl_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int loop_unregister_transfer(int number)
|
||||
{
|
||||
unsigned int n = number;
|
||||
struct loop_device *lo;
|
||||
struct loop_func_table *xfer;
|
||||
|
||||
if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
xfer_funcs[n] = NULL;
|
||||
|
||||
list_for_each_entry(lo, &loop_devices, lo_list) {
|
||||
mutex_lock(&lo->lo_ctl_mutex);
|
||||
|
||||
if (lo->lo_encryption == xfer)
|
||||
loop_release_xfer(lo);
|
||||
|
||||
mutex_unlock(&lo->lo_ctl_mutex);
|
||||
}
|
||||
|
||||
idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(loop_register_transfer);
|
||||
EXPORT_SYMBOL(loop_unregister_transfer);
|
||||
|
||||
static struct loop_device *loop_alloc(int i)
|
||||
static int loop_add(struct loop_device **l, int i)
|
||||
{
|
||||
struct loop_device *lo;
|
||||
struct gendisk *disk;
|
||||
int err;
|
||||
|
||||
lo = kzalloc(sizeof(*lo), GFP_KERNEL);
|
||||
if (!lo)
|
||||
if (!lo) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = idr_pre_get(&loop_index_idr, GFP_KERNEL);
|
||||
if (err < 0)
|
||||
goto out_free_dev;
|
||||
|
||||
if (i >= 0) {
|
||||
int m;
|
||||
|
||||
/* create specific i in the index */
|
||||
err = idr_get_new_above(&loop_index_idr, lo, i, &m);
|
||||
if (err >= 0 && i != m) {
|
||||
idr_remove(&loop_index_idr, m);
|
||||
err = -EEXIST;
|
||||
}
|
||||
} else if (i == -1) {
|
||||
int m;
|
||||
|
||||
/* get next free nr */
|
||||
err = idr_get_new(&loop_index_idr, lo, &m);
|
||||
if (err >= 0)
|
||||
i = m;
|
||||
} else {
|
||||
err = -EINVAL;
|
||||
}
|
||||
if (err < 0)
|
||||
goto out_free_dev;
|
||||
|
||||
lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
|
||||
if (!lo->lo_queue)
|
||||
|
@ -1611,81 +1646,158 @@ static struct loop_device *loop_alloc(int i)
|
|||
disk->private_data = lo;
|
||||
disk->queue = lo->lo_queue;
|
||||
sprintf(disk->disk_name, "loop%d", i);
|
||||
return lo;
|
||||
add_disk(disk);
|
||||
*l = lo;
|
||||
return lo->lo_number;
|
||||
|
||||
out_free_queue:
|
||||
blk_cleanup_queue(lo->lo_queue);
|
||||
out_free_dev:
|
||||
kfree(lo);
|
||||
out:
|
||||
return NULL;
|
||||
return err;
|
||||
}
|
||||
|
||||
static void loop_free(struct loop_device *lo)
|
||||
static void loop_remove(struct loop_device *lo)
|
||||
{
|
||||
del_gendisk(lo->lo_disk);
|
||||
blk_cleanup_queue(lo->lo_queue);
|
||||
put_disk(lo->lo_disk);
|
||||
list_del(&lo->lo_list);
|
||||
kfree(lo);
|
||||
}
|
||||
|
||||
static struct loop_device *loop_init_one(int i)
|
||||
static int find_free_cb(int id, void *ptr, void *data)
|
||||
{
|
||||
struct loop_device *lo;
|
||||
struct loop_device *lo = ptr;
|
||||
struct loop_device **l = data;
|
||||
|
||||
list_for_each_entry(lo, &loop_devices, lo_list) {
|
||||
if (lo->lo_number == i)
|
||||
return lo;
|
||||
if (lo->lo_state == Lo_unbound) {
|
||||
*l = lo;
|
||||
return 1;
|
||||
}
|
||||
|
||||
lo = loop_alloc(i);
|
||||
if (lo) {
|
||||
add_disk(lo->lo_disk);
|
||||
list_add_tail(&lo->lo_list, &loop_devices);
|
||||
}
|
||||
return lo;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void loop_del_one(struct loop_device *lo)
|
||||
static int loop_lookup(struct loop_device **l, int i)
|
||||
{
|
||||
del_gendisk(lo->lo_disk);
|
||||
loop_free(lo);
|
||||
struct loop_device *lo;
|
||||
int ret = -ENODEV;
|
||||
|
||||
if (i < 0) {
|
||||
int err;
|
||||
|
||||
err = idr_for_each(&loop_index_idr, &find_free_cb, &lo);
|
||||
if (err == 1) {
|
||||
*l = lo;
|
||||
ret = lo->lo_number;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* lookup and return a specific i */
|
||||
lo = idr_find(&loop_index_idr, i);
|
||||
if (lo) {
|
||||
*l = lo;
|
||||
ret = lo->lo_number;
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct kobject *loop_probe(dev_t dev, int *part, void *data)
|
||||
{
|
||||
struct loop_device *lo;
|
||||
struct kobject *kobj;
|
||||
int err;
|
||||
|
||||
mutex_lock(&loop_devices_mutex);
|
||||
lo = loop_init_one(MINOR(dev) >> part_shift);
|
||||
kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM);
|
||||
mutex_unlock(&loop_devices_mutex);
|
||||
mutex_lock(&loop_index_mutex);
|
||||
err = loop_lookup(&lo, MINOR(dev) >> part_shift);
|
||||
if (err < 0)
|
||||
err = loop_add(&lo, MINOR(dev) >> part_shift);
|
||||
if (err < 0)
|
||||
kobj = ERR_PTR(err);
|
||||
else
|
||||
kobj = get_disk(lo->lo_disk);
|
||||
mutex_unlock(&loop_index_mutex);
|
||||
|
||||
*part = 0;
|
||||
return kobj;
|
||||
}
|
||||
|
||||
static long loop_control_ioctl(struct file *file, unsigned int cmd,
|
||||
unsigned long parm)
|
||||
{
|
||||
struct loop_device *lo;
|
||||
int ret = -ENOSYS;
|
||||
|
||||
mutex_lock(&loop_index_mutex);
|
||||
switch (cmd) {
|
||||
case LOOP_CTL_ADD:
|
||||
ret = loop_lookup(&lo, parm);
|
||||
if (ret >= 0) {
|
||||
ret = -EEXIST;
|
||||
break;
|
||||
}
|
||||
ret = loop_add(&lo, parm);
|
||||
break;
|
||||
case LOOP_CTL_REMOVE:
|
||||
ret = loop_lookup(&lo, parm);
|
||||
if (ret < 0)
|
||||
break;
|
||||
mutex_lock(&lo->lo_ctl_mutex);
|
||||
if (lo->lo_state != Lo_unbound) {
|
||||
ret = -EBUSY;
|
||||
mutex_unlock(&lo->lo_ctl_mutex);
|
||||
break;
|
||||
}
|
||||
if (lo->lo_refcnt > 0) {
|
||||
ret = -EBUSY;
|
||||
mutex_unlock(&lo->lo_ctl_mutex);
|
||||
break;
|
||||
}
|
||||
lo->lo_disk->private_data = NULL;
|
||||
mutex_unlock(&lo->lo_ctl_mutex);
|
||||
idr_remove(&loop_index_idr, lo->lo_number);
|
||||
loop_remove(lo);
|
||||
break;
|
||||
case LOOP_CTL_GET_FREE:
|
||||
ret = loop_lookup(&lo, -1);
|
||||
if (ret >= 0)
|
||||
break;
|
||||
ret = loop_add(&lo, -1);
|
||||
}
|
||||
mutex_unlock(&loop_index_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct file_operations loop_ctl_fops = {
|
||||
.open = nonseekable_open,
|
||||
.unlocked_ioctl = loop_control_ioctl,
|
||||
.compat_ioctl = loop_control_ioctl,
|
||||
.owner = THIS_MODULE,
|
||||
.llseek = noop_llseek,
|
||||
};
|
||||
|
||||
static struct miscdevice loop_misc = {
|
||||
.minor = LOOP_CTRL_MINOR,
|
||||
.name = "loop-control",
|
||||
.fops = &loop_ctl_fops,
|
||||
};
|
||||
|
||||
MODULE_ALIAS_MISCDEV(LOOP_CTRL_MINOR);
|
||||
MODULE_ALIAS("devname:loop-control");
|
||||
|
||||
static int __init loop_init(void)
|
||||
{
|
||||
int i, nr;
|
||||
unsigned long range;
|
||||
struct loop_device *lo, *next;
|
||||
struct loop_device *lo;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* loop module now has a feature to instantiate underlying device
|
||||
* structure on-demand, provided that there is an access dev node.
|
||||
* However, this will not work well with user space tool that doesn't
|
||||
* know about such "feature". In order to not break any existing
|
||||
* tool, we do the following:
|
||||
*
|
||||
* (1) if max_loop is specified, create that many upfront, and this
|
||||
* also becomes a hard limit.
|
||||
* (2) if max_loop is not specified, create 8 loop device on module
|
||||
* load, user can further extend loop device by create dev node
|
||||
* themselves and have kernel automatically instantiate actual
|
||||
* device on-demand.
|
||||
*/
|
||||
err = misc_register(&loop_misc);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
part_shift = 0;
|
||||
if (max_part > 0) {
|
||||
|
@ -1708,57 +1820,60 @@ static int __init loop_init(void)
|
|||
if (max_loop > 1UL << (MINORBITS - part_shift))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* If max_loop is specified, create that many devices upfront.
|
||||
* This also becomes a hard limit. If max_loop is not specified,
|
||||
* create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module
|
||||
* init time. Loop devices can be requested on-demand with the
|
||||
* /dev/loop-control interface, or be instantiated by accessing
|
||||
* a 'dead' device node.
|
||||
*/
|
||||
if (max_loop) {
|
||||
nr = max_loop;
|
||||
range = max_loop << part_shift;
|
||||
} else {
|
||||
nr = 8;
|
||||
nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT;
|
||||
range = 1UL << MINORBITS;
|
||||
}
|
||||
|
||||
if (register_blkdev(LOOP_MAJOR, "loop"))
|
||||
return -EIO;
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
lo = loop_alloc(i);
|
||||
if (!lo)
|
||||
goto Enomem;
|
||||
list_add_tail(&lo->lo_list, &loop_devices);
|
||||
}
|
||||
|
||||
/* point of no return */
|
||||
|
||||
list_for_each_entry(lo, &loop_devices, lo_list)
|
||||
add_disk(lo->lo_disk);
|
||||
|
||||
blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
|
||||
THIS_MODULE, loop_probe, NULL, NULL);
|
||||
|
||||
/* pre-create number of devices given by config or max_loop */
|
||||
mutex_lock(&loop_index_mutex);
|
||||
for (i = 0; i < nr; i++)
|
||||
loop_add(&lo, i);
|
||||
mutex_unlock(&loop_index_mutex);
|
||||
|
||||
printk(KERN_INFO "loop: module loaded\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
Enomem:
|
||||
printk(KERN_INFO "loop: out of memory\n");
|
||||
static int loop_exit_cb(int id, void *ptr, void *data)
|
||||
{
|
||||
struct loop_device *lo = ptr;
|
||||
|
||||
list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
|
||||
loop_free(lo);
|
||||
|
||||
unregister_blkdev(LOOP_MAJOR, "loop");
|
||||
return -ENOMEM;
|
||||
loop_remove(lo);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit loop_exit(void)
|
||||
{
|
||||
unsigned long range;
|
||||
struct loop_device *lo, *next;
|
||||
|
||||
range = max_loop ? max_loop << part_shift : 1UL << MINORBITS;
|
||||
|
||||
list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
|
||||
loop_del_one(lo);
|
||||
idr_for_each(&loop_index_idr, &loop_exit_cb, NULL);
|
||||
idr_remove_all(&loop_index_idr);
|
||||
idr_destroy(&loop_index_idr);
|
||||
|
||||
blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
|
||||
unregister_blkdev(LOOP_MAJOR, "loop");
|
||||
|
||||
misc_deregister(&loop_misc);
|
||||
}
|
||||
|
||||
module_init(loop_init);
|
||||
|
|
|
@ -1184,6 +1184,7 @@ static struct of_device_id swim3_match[] =
|
|||
{
|
||||
.compatible = "swim3"
|
||||
},
|
||||
{ /* end of list */ }
|
||||
};
|
||||
|
||||
static struct macio_driver swim3_driver =
|
||||
|
|
|
@ -123,8 +123,8 @@ static DEFINE_SPINLOCK(minor_lock);
|
|||
#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
|
||||
#define EMULATED_HD_DISK_MINOR_OFFSET (0)
|
||||
#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
|
||||
#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16))
|
||||
#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4)
|
||||
#define EMULATED_SD_DISK_MINOR_OFFSET (0)
|
||||
#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256)
|
||||
|
||||
#define DEV_NAME "xvd" /* name in /dev */
|
||||
|
||||
|
@ -529,7 +529,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
|||
minor = BLKIF_MINOR_EXT(info->vdevice);
|
||||
nr_parts = PARTS_PER_EXT_DISK;
|
||||
offset = minor / nr_parts;
|
||||
if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4)
|
||||
if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4)
|
||||
printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
|
||||
"emulated IDE disks,\n\t choose an xvd device name"
|
||||
"from xvde on\n", info->vdevice);
|
||||
|
|
|
@ -1929,11 +1929,17 @@ static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s,
|
|||
goto out;
|
||||
|
||||
s->manufact.len = buf[0] << 8 | buf[1];
|
||||
if (s->manufact.len < 0 || s->manufact.len > 2048) {
|
||||
if (s->manufact.len < 0) {
|
||||
cdinfo(CD_WARNING, "Received invalid manufacture info length"
|
||||
" (%d)\n", s->manufact.len);
|
||||
ret = -EIO;
|
||||
} else {
|
||||
if (s->manufact.len > 2048) {
|
||||
cdinfo(CD_WARNING, "Received invalid manufacture info "
|
||||
"length (%d): truncating to 2048\n",
|
||||
s->manufact.len);
|
||||
s->manufact.len = 2048;
|
||||
}
|
||||
memcpy(s->manufact.value, &buf[4], s->manufact.len);
|
||||
}
|
||||
|
||||
|
|
|
@ -125,7 +125,11 @@ enum rq_flag_bits {
|
|||
__REQ_SYNC, /* request is sync (sync write or read) */
|
||||
__REQ_META, /* metadata io request */
|
||||
__REQ_DISCARD, /* request to discard sectors */
|
||||
__REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
|
||||
|
||||
__REQ_NOIDLE, /* don't anticipate more IO after this one */
|
||||
__REQ_FUA, /* forced unit access */
|
||||
__REQ_FLUSH, /* request for cache flush */
|
||||
|
||||
/* bio only flags */
|
||||
__REQ_RAHEAD, /* read ahead, can fail anytime */
|
||||
|
@ -135,7 +139,6 @@ enum rq_flag_bits {
|
|||
/* request only flags */
|
||||
__REQ_SORTED, /* elevator knows about this request */
|
||||
__REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
|
||||
__REQ_FUA, /* forced unit access */
|
||||
__REQ_NOMERGE, /* don't touch this for merging */
|
||||
__REQ_STARTED, /* drive already may have started this one */
|
||||
__REQ_DONTPREP, /* don't call prep for this one */
|
||||
|
@ -146,11 +149,9 @@ enum rq_flag_bits {
|
|||
__REQ_PREEMPT, /* set for "ide_preempt" requests */
|
||||
__REQ_ALLOCED, /* request came from our alloc pool */
|
||||
__REQ_COPY_USER, /* contains copies of user pages */
|
||||
__REQ_FLUSH, /* request for cache flush */
|
||||
__REQ_FLUSH_SEQ, /* request for flush sequence */
|
||||
__REQ_IO_STAT, /* account I/O stat */
|
||||
__REQ_MIXED_MERGE, /* merge of different types, fail separately */
|
||||
__REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
|
||||
__REQ_NR_BITS, /* stops here */
|
||||
};
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ struct request_pm_state;
|
|||
struct blk_trace;
|
||||
struct request;
|
||||
struct sg_io_hdr;
|
||||
struct bsg_job;
|
||||
|
||||
#define BLKDEV_MIN_RQ 4
|
||||
#define BLKDEV_MAX_RQ 128 /* Default maximum */
|
||||
|
@ -117,6 +118,7 @@ struct request {
|
|||
struct {
|
||||
unsigned int seq;
|
||||
struct list_head list;
|
||||
rq_end_io_fn *saved_end_io;
|
||||
} flush;
|
||||
};
|
||||
|
||||
|
@ -209,6 +211,7 @@ typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *,
|
|||
typedef void (softirq_done_fn)(struct request *);
|
||||
typedef int (dma_drain_needed_fn)(struct request *);
|
||||
typedef int (lld_busy_fn) (struct request_queue *q);
|
||||
typedef int (bsg_job_fn) (struct bsg_job *);
|
||||
|
||||
enum blk_eh_timer_return {
|
||||
BLK_EH_NOT_HANDLED,
|
||||
|
@ -375,6 +378,8 @@ struct request_queue {
|
|||
struct mutex sysfs_lock;
|
||||
|
||||
#if defined(CONFIG_BLK_DEV_BSG)
|
||||
bsg_job_fn *bsg_job_fn;
|
||||
int bsg_job_size;
|
||||
struct bsg_class_device bsg_dev;
|
||||
#endif
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
enum blktrace_cat {
|
||||
BLK_TC_READ = 1 << 0, /* reads */
|
||||
BLK_TC_WRITE = 1 << 1, /* writes */
|
||||
BLK_TC_BARRIER = 1 << 2, /* barrier */
|
||||
BLK_TC_FLUSH = 1 << 2, /* flush */
|
||||
BLK_TC_SYNC = 1 << 3, /* sync IO */
|
||||
BLK_TC_SYNCIO = BLK_TC_SYNC,
|
||||
BLK_TC_QUEUE = 1 << 4, /* queueing/merging */
|
||||
|
@ -28,8 +28,9 @@ enum blktrace_cat {
|
|||
BLK_TC_META = 1 << 12, /* metadata */
|
||||
BLK_TC_DISCARD = 1 << 13, /* discard requests */
|
||||
BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */
|
||||
BLK_TC_FUA = 1 << 15, /* fua requests */
|
||||
|
||||
BLK_TC_END = 1 << 15, /* only 16-bits, reminder */
|
||||
BLK_TC_END = 1 << 15, /* we've run out of bits! */
|
||||
};
|
||||
|
||||
#define BLK_TC_SHIFT (16)
|
||||
|
|
73
include/linux/bsg-lib.h
Normal file
73
include/linux/bsg-lib.h
Normal file
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* BSG helper library
|
||||
*
|
||||
* Copyright (C) 2008 James Smart, Emulex Corporation
|
||||
* Copyright (C) 2011 Red Hat, Inc. All rights reserved.
|
||||
* Copyright (C) 2011 Mike Christie
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*/
|
||||
#ifndef _BLK_BSG_
|
||||
#define _BLK_BSG_
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
|
||||
struct request;
|
||||
struct device;
|
||||
struct scatterlist;
|
||||
struct request_queue;
|
||||
|
||||
struct bsg_buffer {
|
||||
unsigned int payload_len;
|
||||
int sg_cnt;
|
||||
struct scatterlist *sg_list;
|
||||
};
|
||||
|
||||
struct bsg_job {
|
||||
struct device *dev;
|
||||
struct request *req;
|
||||
|
||||
/* Transport/driver specific request/reply structs */
|
||||
void *request;
|
||||
void *reply;
|
||||
|
||||
unsigned int request_len;
|
||||
unsigned int reply_len;
|
||||
/*
|
||||
* On entry : reply_len indicates the buffer size allocated for
|
||||
* the reply.
|
||||
*
|
||||
* Upon completion : the message handler must set reply_len
|
||||
* to indicates the size of the reply to be returned to the
|
||||
* caller.
|
||||
*/
|
||||
|
||||
/* DMA payloads for the request/response */
|
||||
struct bsg_buffer request_payload;
|
||||
struct bsg_buffer reply_payload;
|
||||
|
||||
void *dd_data; /* Used for driver-specific storage */
|
||||
};
|
||||
|
||||
void bsg_job_done(struct bsg_job *job, int result,
|
||||
unsigned int reply_payload_rcv_len);
|
||||
int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
|
||||
bsg_job_fn *job_fn, int dd_job_size);
|
||||
void bsg_request_fn(struct request_queue *q);
|
||||
void bsg_remove_queue(struct request_queue *q);
|
||||
void bsg_goose_queue(struct request_queue *q);
|
||||
|
||||
#endif
|
|
@ -64,7 +64,6 @@ struct loop_device {
|
|||
|
||||
struct request_queue *lo_queue;
|
||||
struct gendisk *lo_disk;
|
||||
struct list_head lo_list;
|
||||
};
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
@ -161,4 +160,8 @@ int loop_unregister_transfer(int number);
|
|||
#define LOOP_CHANGE_FD 0x4C06
|
||||
#define LOOP_SET_CAPACITY 0x4C07
|
||||
|
||||
/* /dev/loop-control interface */
|
||||
#define LOOP_CTL_ADD 0x4C80
|
||||
#define LOOP_CTL_REMOVE 0x4C81
|
||||
#define LOOP_CTL_GET_FREE 0x4C82
|
||||
#endif
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#define BTRFS_MINOR 234
|
||||
#define AUTOFS_MINOR 235
|
||||
#define MAPPER_CTRL_MINOR 236
|
||||
#define LOOP_CTRL_MINOR 237
|
||||
#define MISC_DYNAMIC_MINOR 255
|
||||
|
||||
struct device;
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
#include <linux/blkdev.h>
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
#define RWBS_LEN 8
|
||||
|
||||
DECLARE_EVENT_CLASS(block_rq_with_error,
|
||||
|
||||
TP_PROTO(struct request_queue *q, struct request *rq),
|
||||
|
@ -19,7 +21,7 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
|
|||
__field( sector_t, sector )
|
||||
__field( unsigned int, nr_sector )
|
||||
__field( int, errors )
|
||||
__array( char, rwbs, 6 )
|
||||
__array( char, rwbs, RWBS_LEN )
|
||||
__dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
|
||||
),
|
||||
|
||||
|
@ -104,7 +106,7 @@ DECLARE_EVENT_CLASS(block_rq,
|
|||
__field( sector_t, sector )
|
||||
__field( unsigned int, nr_sector )
|
||||
__field( unsigned int, bytes )
|
||||
__array( char, rwbs, 6 )
|
||||
__array( char, rwbs, RWBS_LEN )
|
||||
__array( char, comm, TASK_COMM_LEN )
|
||||
__dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
|
||||
),
|
||||
|
@ -183,7 +185,7 @@ TRACE_EVENT(block_bio_bounce,
|
|||
__field( dev_t, dev )
|
||||
__field( sector_t, sector )
|
||||
__field( unsigned int, nr_sector )
|
||||
__array( char, rwbs, 6 )
|
||||
__array( char, rwbs, RWBS_LEN )
|
||||
__array( char, comm, TASK_COMM_LEN )
|
||||
),
|
||||
|
||||
|
@ -222,7 +224,7 @@ TRACE_EVENT(block_bio_complete,
|
|||
__field( sector_t, sector )
|
||||
__field( unsigned, nr_sector )
|
||||
__field( int, error )
|
||||
__array( char, rwbs, 6 )
|
||||
__array( char, rwbs, RWBS_LEN)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
|
@ -249,7 +251,7 @@ DECLARE_EVENT_CLASS(block_bio,
|
|||
__field( dev_t, dev )
|
||||
__field( sector_t, sector )
|
||||
__field( unsigned int, nr_sector )
|
||||
__array( char, rwbs, 6 )
|
||||
__array( char, rwbs, RWBS_LEN )
|
||||
__array( char, comm, TASK_COMM_LEN )
|
||||
),
|
||||
|
||||
|
@ -321,7 +323,7 @@ DECLARE_EVENT_CLASS(block_get_rq,
|
|||
__field( dev_t, dev )
|
||||
__field( sector_t, sector )
|
||||
__field( unsigned int, nr_sector )
|
||||
__array( char, rwbs, 6 )
|
||||
__array( char, rwbs, RWBS_LEN )
|
||||
__array( char, comm, TASK_COMM_LEN )
|
||||
),
|
||||
|
||||
|
@ -456,7 +458,7 @@ TRACE_EVENT(block_split,
|
|||
__field( dev_t, dev )
|
||||
__field( sector_t, sector )
|
||||
__field( sector_t, new_sector )
|
||||
__array( char, rwbs, 6 )
|
||||
__array( char, rwbs, RWBS_LEN )
|
||||
__array( char, comm, TASK_COMM_LEN )
|
||||
),
|
||||
|
||||
|
@ -498,7 +500,7 @@ TRACE_EVENT(block_bio_remap,
|
|||
__field( unsigned int, nr_sector )
|
||||
__field( dev_t, old_dev )
|
||||
__field( sector_t, old_sector )
|
||||
__array( char, rwbs, 6 )
|
||||
__array( char, rwbs, RWBS_LEN)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
|
@ -542,7 +544,7 @@ TRACE_EVENT(block_rq_remap,
|
|||
__field( unsigned int, nr_sector )
|
||||
__field( dev_t, old_dev )
|
||||
__field( sector_t, old_sector )
|
||||
__array( char, rwbs, 6 )
|
||||
__array( char, rwbs, RWBS_LEN)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
|
|
|
@ -206,6 +206,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
|
|||
what |= MASK_TC_BIT(rw, RAHEAD);
|
||||
what |= MASK_TC_BIT(rw, META);
|
||||
what |= MASK_TC_BIT(rw, DISCARD);
|
||||
what |= MASK_TC_BIT(rw, FLUSH);
|
||||
what |= MASK_TC_BIT(rw, FUA);
|
||||
|
||||
pid = tsk->pid;
|
||||
if (act_log_check(bt, what, sector, pid))
|
||||
|
@ -1054,6 +1056,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (tc & BLK_TC_FLUSH)
|
||||
rwbs[i++] = 'F';
|
||||
|
||||
if (tc & BLK_TC_DISCARD)
|
||||
rwbs[i++] = 'D';
|
||||
else if (tc & BLK_TC_WRITE)
|
||||
|
@ -1063,10 +1068,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
|
|||
else
|
||||
rwbs[i++] = 'N';
|
||||
|
||||
if (tc & BLK_TC_FUA)
|
||||
rwbs[i++] = 'F';
|
||||
if (tc & BLK_TC_AHEAD)
|
||||
rwbs[i++] = 'A';
|
||||
if (tc & BLK_TC_BARRIER)
|
||||
rwbs[i++] = 'B';
|
||||
if (tc & BLK_TC_SYNC)
|
||||
rwbs[i++] = 'S';
|
||||
if (tc & BLK_TC_META)
|
||||
|
@ -1132,7 +1137,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
|
|||
|
||||
static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
|
||||
{
|
||||
char rwbs[6];
|
||||
char rwbs[RWBS_LEN];
|
||||
unsigned long long ts = iter->ts;
|
||||
unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
|
||||
unsigned secs = (unsigned long)ts;
|
||||
|
@ -1148,7 +1153,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
|
|||
|
||||
static int blk_log_action(struct trace_iterator *iter, const char *act)
|
||||
{
|
||||
char rwbs[6];
|
||||
char rwbs[RWBS_LEN];
|
||||
const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
|
||||
|
||||
fill_rwbs(rwbs, t);
|
||||
|
@ -1561,7 +1566,7 @@ static const struct {
|
|||
} mask_maps[] = {
|
||||
{ BLK_TC_READ, "read" },
|
||||
{ BLK_TC_WRITE, "write" },
|
||||
{ BLK_TC_BARRIER, "barrier" },
|
||||
{ BLK_TC_FLUSH, "flush" },
|
||||
{ BLK_TC_SYNC, "sync" },
|
||||
{ BLK_TC_QUEUE, "queue" },
|
||||
{ BLK_TC_REQUEUE, "requeue" },
|
||||
|
@ -1573,6 +1578,7 @@ static const struct {
|
|||
{ BLK_TC_META, "meta" },
|
||||
{ BLK_TC_DISCARD, "discard" },
|
||||
{ BLK_TC_DRV_DATA, "drv_data" },
|
||||
{ BLK_TC_FUA, "fua" },
|
||||
};
|
||||
|
||||
static int blk_trace_str2mask(const char *str)
|
||||
|
@ -1788,6 +1794,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
|
|||
{
|
||||
int i = 0;
|
||||
|
||||
if (rw & REQ_FLUSH)
|
||||
rwbs[i++] = 'F';
|
||||
|
||||
if (rw & WRITE)
|
||||
rwbs[i++] = 'W';
|
||||
else if (rw & REQ_DISCARD)
|
||||
|
@ -1797,6 +1806,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
|
|||
else
|
||||
rwbs[i++] = 'N';
|
||||
|
||||
if (rw & REQ_FUA)
|
||||
rwbs[i++] = 'F';
|
||||
if (rw & REQ_RAHEAD)
|
||||
rwbs[i++] = 'A';
|
||||
if (rw & REQ_SYNC)
|
||||
|
|
Loading…
Reference in a new issue