implement in-kernel gendisk events handling
Currently, media presence polling for removeable block devices is done from userland. There are several issues with this. * Polling is done by periodically opening the device. For SCSI devices, the command sequence generated by such action involves a few different commands including TEST_UNIT_READY. This behavior, while perfectly legal, is different from Windows which only issues single command, GET_EVENT_STATUS_NOTIFICATION. Unfortunately, some ATAPI devices lock up after being periodically queried such command sequences. * There is no reliable and unintrusive way for a userland program to tell whether the target device is safe for media presence polling. For example, polling for media presence during an on-going burning session can make it fail. The polling program can avoid this by opening the device with O_EXCL but then it risks making a valid exclusive user of the device fail w/ -EBUSY. * Userland polling is unnecessarily heavy and in-kernel implementation is lighter and better coordinated (workqueue, timer slack). This patch implements framework for in-kernel disk event handling, which includes media presence polling. * bdops->check_events() is added, which supercedes ->media_changed(). It should check whether there's any pending event and return if so. Currently, two events are defined - DISK_EVENT_MEDIA_CHANGE and DISK_EVENT_EJECT_REQUEST. ->check_events() is guaranteed not to be called parallelly. * gendisk->events and ->async_events are added. These should be initialized by block driver before passing the device to add_disk(). The former contains the mask of all supported events and the latter the mask of all events which the device can report without polling. /sys/block/*/events[_async] export these to userland. * Kernel parameter block.events_dfl_poll_msecs controls the system polling interval (default is 0 which means disable) and /sys/block/*/events_poll_msecs control polling intervals for individual devices (default is -1 meaning use system setting). Note that if a device can report all supported events asynchronously and its polling interval isn't explicitly set, the device won't be polled regardless of the system polling interval. * If a device is opened exclusively with write access, event checking is automatically disabled until all write exclusive accesses are released. * There are event 'clearing' events. For example, both of currently defined events are cleared after the device has been successfully opened. This information is passed to ->check_events() callback using @clearing argument as a hint. * Event checking is always performed from system_nrt_wq and timer slack is set to 25% for polling. * Nothing changes for drivers which implement ->media_changed() but not ->check_events(). Going forward, all drivers will be converted to ->check_events() and ->media_change() will be dropped. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Kay Sievers <kay.sievers@vrfy.org> Cc: Jan Kara <jack@suse.cz> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
This commit is contained in:
parent
d2bf1b6723
commit
77ea887e43
5 changed files with 484 additions and 8 deletions
429
block/genhd.c
429
block/genhd.c
|
@ -18,6 +18,7 @@
|
||||||
#include <linux/buffer_head.h>
|
#include <linux/buffer_head.h>
|
||||||
#include <linux/mutex.h>
|
#include <linux/mutex.h>
|
||||||
#include <linux/idr.h>
|
#include <linux/idr.h>
|
||||||
|
#include <linux/log2.h>
|
||||||
|
|
||||||
#include "blk.h"
|
#include "blk.h"
|
||||||
|
|
||||||
|
@ -35,6 +36,10 @@ static DEFINE_IDR(ext_devt_idr);
|
||||||
|
|
||||||
static struct device_type disk_type;
|
static struct device_type disk_type;
|
||||||
|
|
||||||
|
static void disk_add_events(struct gendisk *disk);
|
||||||
|
static void disk_del_events(struct gendisk *disk);
|
||||||
|
static void disk_release_events(struct gendisk *disk);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* disk_get_part - get partition
|
* disk_get_part - get partition
|
||||||
* @disk: disk to look partition from
|
* @disk: disk to look partition from
|
||||||
|
@ -609,6 +614,8 @@ void add_disk(struct gendisk *disk)
|
||||||
retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
|
retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
|
||||||
"bdi");
|
"bdi");
|
||||||
WARN_ON(retval);
|
WARN_ON(retval);
|
||||||
|
|
||||||
|
disk_add_events(disk);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(add_disk);
|
EXPORT_SYMBOL(add_disk);
|
||||||
|
|
||||||
|
@ -617,6 +624,8 @@ void del_gendisk(struct gendisk *disk)
|
||||||
struct disk_part_iter piter;
|
struct disk_part_iter piter;
|
||||||
struct hd_struct *part;
|
struct hd_struct *part;
|
||||||
|
|
||||||
|
disk_del_events(disk);
|
||||||
|
|
||||||
/* invalidate stuff */
|
/* invalidate stuff */
|
||||||
disk_part_iter_init(&piter, disk,
|
disk_part_iter_init(&piter, disk,
|
||||||
DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
|
DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
|
||||||
|
@ -1089,6 +1098,7 @@ static void disk_release(struct device *dev)
|
||||||
{
|
{
|
||||||
struct gendisk *disk = dev_to_disk(dev);
|
struct gendisk *disk = dev_to_disk(dev);
|
||||||
|
|
||||||
|
disk_release_events(disk);
|
||||||
kfree(disk->random);
|
kfree(disk->random);
|
||||||
disk_replace_part_tbl(disk, NULL);
|
disk_replace_part_tbl(disk, NULL);
|
||||||
free_part_stats(&disk->part0);
|
free_part_stats(&disk->part0);
|
||||||
|
@ -1350,3 +1360,422 @@ int invalidate_partition(struct gendisk *disk, int partno)
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPORT_SYMBOL(invalidate_partition);
|
EXPORT_SYMBOL(invalidate_partition);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Disk events - monitor disk events like media change and eject request.
|
||||||
|
*/
|
||||||
|
struct disk_events {
|
||||||
|
struct list_head node; /* all disk_event's */
|
||||||
|
struct gendisk *disk; /* the associated disk */
|
||||||
|
spinlock_t lock;
|
||||||
|
|
||||||
|
int block; /* event blocking depth */
|
||||||
|
unsigned int pending; /* events already sent out */
|
||||||
|
unsigned int clearing; /* events being cleared */
|
||||||
|
|
||||||
|
long poll_msecs; /* interval, -1 for default */
|
||||||
|
struct delayed_work dwork;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char *disk_events_strs[] = {
|
||||||
|
[ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change",
|
||||||
|
[ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request",
|
||||||
|
};
|
||||||
|
|
||||||
|
static char *disk_uevents[] = {
|
||||||
|
[ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1",
|
||||||
|
[ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1",
|
||||||
|
};
|
||||||
|
|
||||||
|
/* list of all disk_events */
|
||||||
|
static DEFINE_MUTEX(disk_events_mutex);
|
||||||
|
static LIST_HEAD(disk_events);
|
||||||
|
|
||||||
|
/* disable in-kernel polling by default */
|
||||||
|
static unsigned long disk_events_dfl_poll_msecs = 0;
|
||||||
|
|
||||||
|
static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
|
||||||
|
{
|
||||||
|
struct disk_events *ev = disk->ev;
|
||||||
|
long intv_msecs = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If device-specific poll interval is set, always use it. If
|
||||||
|
* the default is being used, poll iff there are events which
|
||||||
|
* can't be monitored asynchronously.
|
||||||
|
*/
|
||||||
|
if (ev->poll_msecs >= 0)
|
||||||
|
intv_msecs = ev->poll_msecs;
|
||||||
|
else if (disk->events & ~disk->async_events)
|
||||||
|
intv_msecs = disk_events_dfl_poll_msecs;
|
||||||
|
|
||||||
|
return msecs_to_jiffies(intv_msecs);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __disk_block_events(struct gendisk *disk, bool sync)
|
||||||
|
{
|
||||||
|
struct disk_events *ev = disk->ev;
|
||||||
|
unsigned long flags;
|
||||||
|
bool cancel;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&ev->lock, flags);
|
||||||
|
cancel = !ev->block++;
|
||||||
|
spin_unlock_irqrestore(&ev->lock, flags);
|
||||||
|
|
||||||
|
if (cancel) {
|
||||||
|
if (sync)
|
||||||
|
cancel_delayed_work_sync(&disk->ev->dwork);
|
||||||
|
else
|
||||||
|
cancel_delayed_work(&disk->ev->dwork);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __disk_unblock_events(struct gendisk *disk, bool check_now)
|
||||||
|
{
|
||||||
|
struct disk_events *ev = disk->ev;
|
||||||
|
unsigned long intv;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&ev->lock, flags);
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(ev->block <= 0))
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
if (--ev->block)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Not exactly a latency critical operation, set poll timer
|
||||||
|
* slack to 25% and kick event check.
|
||||||
|
*/
|
||||||
|
intv = disk_events_poll_jiffies(disk);
|
||||||
|
set_timer_slack(&ev->dwork.timer, intv / 4);
|
||||||
|
if (check_now)
|
||||||
|
queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
|
||||||
|
else if (intv)
|
||||||
|
queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
|
||||||
|
out_unlock:
|
||||||
|
spin_unlock_irqrestore(&ev->lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* disk_block_events - block and flush disk event checking
|
||||||
|
* @disk: disk to block events for
|
||||||
|
*
|
||||||
|
* On return from this function, it is guaranteed that event checking
|
||||||
|
* isn't in progress and won't happen until unblocked by
|
||||||
|
* disk_unblock_events(). Events blocking is counted and the actual
|
||||||
|
* unblocking happens after the matching number of unblocks are done.
|
||||||
|
*
|
||||||
|
* Note that this intentionally does not block event checking from
|
||||||
|
* disk_clear_events().
|
||||||
|
*
|
||||||
|
* CONTEXT:
|
||||||
|
* Might sleep.
|
||||||
|
*/
|
||||||
|
void disk_block_events(struct gendisk *disk)
|
||||||
|
{
|
||||||
|
if (disk->ev)
|
||||||
|
__disk_block_events(disk, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* disk_unblock_events - unblock disk event checking
|
||||||
|
* @disk: disk to unblock events for
|
||||||
|
*
|
||||||
|
* Undo disk_block_events(). When the block count reaches zero, it
|
||||||
|
* starts events polling if configured.
|
||||||
|
*
|
||||||
|
* CONTEXT:
|
||||||
|
* Don't care. Safe to call from irq context.
|
||||||
|
*/
|
||||||
|
void disk_unblock_events(struct gendisk *disk)
|
||||||
|
{
|
||||||
|
if (disk->ev)
|
||||||
|
__disk_unblock_events(disk, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* disk_check_events - schedule immediate event checking
|
||||||
|
* @disk: disk to check events for
|
||||||
|
*
|
||||||
|
* Schedule immediate event checking on @disk if not blocked.
|
||||||
|
*
|
||||||
|
* CONTEXT:
|
||||||
|
* Don't care. Safe to call from irq context.
|
||||||
|
*/
|
||||||
|
void disk_check_events(struct gendisk *disk)
|
||||||
|
{
|
||||||
|
if (disk->ev) {
|
||||||
|
__disk_block_events(disk, false);
|
||||||
|
__disk_unblock_events(disk, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(disk_check_events);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* disk_clear_events - synchronously check, clear and return pending events
|
||||||
|
* @disk: disk to fetch and clear events from
|
||||||
|
* @mask: mask of events to be fetched and clearted
|
||||||
|
*
|
||||||
|
* Disk events are synchronously checked and pending events in @mask
|
||||||
|
* are cleared and returned. This ignores the block count.
|
||||||
|
*
|
||||||
|
* CONTEXT:
|
||||||
|
* Might sleep.
|
||||||
|
*/
|
||||||
|
unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
|
||||||
|
{
|
||||||
|
const struct block_device_operations *bdops = disk->fops;
|
||||||
|
struct disk_events *ev = disk->ev;
|
||||||
|
unsigned int pending;
|
||||||
|
|
||||||
|
if (!ev) {
|
||||||
|
/* for drivers still using the old ->media_changed method */
|
||||||
|
if ((mask & DISK_EVENT_MEDIA_CHANGE) &&
|
||||||
|
bdops->media_changed && bdops->media_changed(disk))
|
||||||
|
return DISK_EVENT_MEDIA_CHANGE;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* tell the workfn about the events being cleared */
|
||||||
|
spin_lock_irq(&ev->lock);
|
||||||
|
ev->clearing |= mask;
|
||||||
|
spin_unlock_irq(&ev->lock);
|
||||||
|
|
||||||
|
/* uncondtionally schedule event check and wait for it to finish */
|
||||||
|
__disk_block_events(disk, true);
|
||||||
|
queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
|
||||||
|
flush_delayed_work(&ev->dwork);
|
||||||
|
__disk_unblock_events(disk, false);
|
||||||
|
|
||||||
|
/* then, fetch and clear pending events */
|
||||||
|
spin_lock_irq(&ev->lock);
|
||||||
|
WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */
|
||||||
|
pending = ev->pending & mask;
|
||||||
|
ev->pending &= ~mask;
|
||||||
|
spin_unlock_irq(&ev->lock);
|
||||||
|
|
||||||
|
return pending;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void disk_events_workfn(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct delayed_work *dwork = to_delayed_work(work);
|
||||||
|
struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
|
||||||
|
struct gendisk *disk = ev->disk;
|
||||||
|
char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
|
||||||
|
unsigned int clearing = ev->clearing;
|
||||||
|
unsigned int events;
|
||||||
|
unsigned long intv;
|
||||||
|
int nr_events = 0, i;
|
||||||
|
|
||||||
|
/* check events */
|
||||||
|
events = disk->fops->check_events(disk, clearing);
|
||||||
|
|
||||||
|
/* accumulate pending events and schedule next poll if necessary */
|
||||||
|
spin_lock_irq(&ev->lock);
|
||||||
|
|
||||||
|
events &= ~ev->pending;
|
||||||
|
ev->pending |= events;
|
||||||
|
ev->clearing &= ~clearing;
|
||||||
|
|
||||||
|
intv = disk_events_poll_jiffies(disk);
|
||||||
|
if (!ev->block && intv)
|
||||||
|
queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
|
||||||
|
|
||||||
|
spin_unlock_irq(&ev->lock);
|
||||||
|
|
||||||
|
/* tell userland about new events */
|
||||||
|
for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
|
||||||
|
if (events & (1 << i))
|
||||||
|
envp[nr_events++] = disk_uevents[i];
|
||||||
|
|
||||||
|
if (nr_events)
|
||||||
|
kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A disk events enabled device has the following sysfs nodes under
|
||||||
|
* its /sys/block/X/ directory.
|
||||||
|
*
|
||||||
|
* events : list of all supported events
|
||||||
|
* events_async : list of events which can be detected w/o polling
|
||||||
|
* events_poll_msecs : polling interval, 0: disable, -1: system default
|
||||||
|
*/
|
||||||
|
static ssize_t __disk_events_show(unsigned int events, char *buf)
|
||||||
|
{
|
||||||
|
const char *delim = "";
|
||||||
|
ssize_t pos = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++)
|
||||||
|
if (events & (1 << i)) {
|
||||||
|
pos += sprintf(buf + pos, "%s%s",
|
||||||
|
delim, disk_events_strs[i]);
|
||||||
|
delim = " ";
|
||||||
|
}
|
||||||
|
if (pos)
|
||||||
|
pos += sprintf(buf + pos, "\n");
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t disk_events_show(struct device *dev,
|
||||||
|
struct device_attribute *attr, char *buf)
|
||||||
|
{
|
||||||
|
struct gendisk *disk = dev_to_disk(dev);
|
||||||
|
|
||||||
|
return __disk_events_show(disk->events, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t disk_events_async_show(struct device *dev,
|
||||||
|
struct device_attribute *attr, char *buf)
|
||||||
|
{
|
||||||
|
struct gendisk *disk = dev_to_disk(dev);
|
||||||
|
|
||||||
|
return __disk_events_show(disk->async_events, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t disk_events_poll_msecs_show(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct gendisk *disk = dev_to_disk(dev);
|
||||||
|
|
||||||
|
return sprintf(buf, "%ld\n", disk->ev->poll_msecs);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t disk_events_poll_msecs_store(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
const char *buf, size_t count)
|
||||||
|
{
|
||||||
|
struct gendisk *disk = dev_to_disk(dev);
|
||||||
|
long intv;
|
||||||
|
|
||||||
|
if (!count || !sscanf(buf, "%ld", &intv))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (intv < 0 && intv != -1)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
__disk_block_events(disk, true);
|
||||||
|
disk->ev->poll_msecs = intv;
|
||||||
|
__disk_unblock_events(disk, true);
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL);
|
||||||
|
static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL);
|
||||||
|
static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR,
|
||||||
|
disk_events_poll_msecs_show,
|
||||||
|
disk_events_poll_msecs_store);
|
||||||
|
|
||||||
|
static const struct attribute *disk_events_attrs[] = {
|
||||||
|
&dev_attr_events.attr,
|
||||||
|
&dev_attr_events_async.attr,
|
||||||
|
&dev_attr_events_poll_msecs.attr,
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The default polling interval can be specified by the kernel
|
||||||
|
* parameter block.events_dfl_poll_msecs which defaults to 0
|
||||||
|
* (disable). This can also be modified runtime by writing to
|
||||||
|
* /sys/module/block/events_dfl_poll_msecs.
|
||||||
|
*/
|
||||||
|
static int disk_events_set_dfl_poll_msecs(const char *val,
|
||||||
|
const struct kernel_param *kp)
|
||||||
|
{
|
||||||
|
struct disk_events *ev;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = param_set_ulong(val, kp);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
mutex_lock(&disk_events_mutex);
|
||||||
|
|
||||||
|
list_for_each_entry(ev, &disk_events, node)
|
||||||
|
disk_check_events(ev->disk);
|
||||||
|
|
||||||
|
mutex_unlock(&disk_events_mutex);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = {
|
||||||
|
.set = disk_events_set_dfl_poll_msecs,
|
||||||
|
.get = param_get_ulong,
|
||||||
|
};
|
||||||
|
|
||||||
|
#undef MODULE_PARAM_PREFIX
|
||||||
|
#define MODULE_PARAM_PREFIX "block."
|
||||||
|
|
||||||
|
module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
|
||||||
|
&disk_events_dfl_poll_msecs, 0644);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* disk_{add|del|release}_events - initialize and destroy disk_events.
|
||||||
|
*/
|
||||||
|
static void disk_add_events(struct gendisk *disk)
|
||||||
|
{
|
||||||
|
struct disk_events *ev;
|
||||||
|
|
||||||
|
if (!disk->fops->check_events || !(disk->events | disk->async_events))
|
||||||
|
return;
|
||||||
|
|
||||||
|
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
|
||||||
|
if (!ev) {
|
||||||
|
pr_warn("%s: failed to initialize events\n", disk->disk_name);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sysfs_create_files(&disk_to_dev(disk)->kobj,
|
||||||
|
disk_events_attrs) < 0) {
|
||||||
|
pr_warn("%s: failed to create sysfs files for events\n",
|
||||||
|
disk->disk_name);
|
||||||
|
kfree(ev);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
disk->ev = ev;
|
||||||
|
|
||||||
|
INIT_LIST_HEAD(&ev->node);
|
||||||
|
ev->disk = disk;
|
||||||
|
spin_lock_init(&ev->lock);
|
||||||
|
ev->block = 1;
|
||||||
|
ev->poll_msecs = -1;
|
||||||
|
INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
|
||||||
|
|
||||||
|
mutex_lock(&disk_events_mutex);
|
||||||
|
list_add_tail(&ev->node, &disk_events);
|
||||||
|
mutex_unlock(&disk_events_mutex);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Block count is initialized to 1 and the following initial
|
||||||
|
* unblock kicks it into action.
|
||||||
|
*/
|
||||||
|
__disk_unblock_events(disk, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void disk_del_events(struct gendisk *disk)
|
||||||
|
{
|
||||||
|
if (!disk->ev)
|
||||||
|
return;
|
||||||
|
|
||||||
|
__disk_block_events(disk, true);
|
||||||
|
|
||||||
|
mutex_lock(&disk_events_mutex);
|
||||||
|
list_del_init(&disk->ev->node);
|
||||||
|
mutex_unlock(&disk_events_mutex);
|
||||||
|
|
||||||
|
sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void disk_release_events(struct gendisk *disk)
|
||||||
|
{
|
||||||
|
/* the block count should be 1 from disk_del_events() */
|
||||||
|
WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
|
||||||
|
kfree(disk->ev);
|
||||||
|
}
|
||||||
|
|
|
@ -948,10 +948,11 @@ int check_disk_change(struct block_device *bdev)
|
||||||
{
|
{
|
||||||
struct gendisk *disk = bdev->bd_disk;
|
struct gendisk *disk = bdev->bd_disk;
|
||||||
const struct block_device_operations *bdops = disk->fops;
|
const struct block_device_operations *bdops = disk->fops;
|
||||||
|
unsigned int events;
|
||||||
|
|
||||||
if (!bdops->media_changed)
|
events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
|
||||||
return 0;
|
DISK_EVENT_EJECT_REQUEST);
|
||||||
if (!bdops->media_changed(bdev->bd_disk))
|
if (!(events & DISK_EVENT_MEDIA_CHANGE))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
flush_disk(bdev);
|
flush_disk(bdev);
|
||||||
|
@ -1158,9 +1159,10 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
|
||||||
|
|
||||||
if (whole) {
|
if (whole) {
|
||||||
/* finish claiming */
|
/* finish claiming */
|
||||||
|
mutex_lock(&bdev->bd_mutex);
|
||||||
spin_lock(&bdev_lock);
|
spin_lock(&bdev_lock);
|
||||||
|
|
||||||
if (res == 0) {
|
if (!res) {
|
||||||
BUG_ON(!bd_may_claim(bdev, whole, holder));
|
BUG_ON(!bd_may_claim(bdev, whole, holder));
|
||||||
/*
|
/*
|
||||||
* Note that for a whole device bd_holders
|
* Note that for a whole device bd_holders
|
||||||
|
@ -1180,6 +1182,20 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
|
||||||
wake_up_bit(&whole->bd_claiming, 0);
|
wake_up_bit(&whole->bd_claiming, 0);
|
||||||
|
|
||||||
spin_unlock(&bdev_lock);
|
spin_unlock(&bdev_lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Block event polling for write claims. Any write
|
||||||
|
* holder makes the write_holder state stick until all
|
||||||
|
* are released. This is good enough and tracking
|
||||||
|
* individual writeable reference is too fragile given
|
||||||
|
* the way @mode is used in blkdev_get/put().
|
||||||
|
*/
|
||||||
|
if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
|
||||||
|
bdev->bd_write_holder = true;
|
||||||
|
disk_block_events(bdev->bd_disk);
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_unlock(&bdev->bd_mutex);
|
||||||
bdput(whole);
|
bdput(whole);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1353,12 +1369,23 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
|
||||||
|
|
||||||
spin_unlock(&bdev_lock);
|
spin_unlock(&bdev_lock);
|
||||||
|
|
||||||
/* if this was the last claim, holder link should go too */
|
/*
|
||||||
if (bdev_free)
|
* If this was the last claim, remove holder link and
|
||||||
|
* unblock evpoll if it was a write holder.
|
||||||
|
*/
|
||||||
|
if (bdev_free) {
|
||||||
bd_unlink_disk_holder(bdev);
|
bd_unlink_disk_holder(bdev);
|
||||||
|
if (bdev->bd_write_holder) {
|
||||||
|
disk_unblock_events(bdev->bd_disk);
|
||||||
|
bdev->bd_write_holder = false;
|
||||||
|
} else
|
||||||
|
disk_check_events(bdev->bd_disk);
|
||||||
|
}
|
||||||
|
|
||||||
mutex_unlock(&bdev->bd_mutex);
|
mutex_unlock(&bdev->bd_mutex);
|
||||||
}
|
} else
|
||||||
|
disk_check_events(bdev->bd_disk);
|
||||||
|
|
||||||
return __blkdev_put(bdev, mode, 0);
|
return __blkdev_put(bdev, mode, 0);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blkdev_put);
|
EXPORT_SYMBOL(blkdev_put);
|
||||||
|
|
|
@ -1251,6 +1251,9 @@ struct block_device_operations {
|
||||||
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
|
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
|
||||||
int (*direct_access) (struct block_device *, sector_t,
|
int (*direct_access) (struct block_device *, sector_t,
|
||||||
void **, unsigned long *);
|
void **, unsigned long *);
|
||||||
|
unsigned int (*check_events) (struct gendisk *disk,
|
||||||
|
unsigned int clearing);
|
||||||
|
/* ->media_changed() is DEPRECATED, use ->check_events() instead */
|
||||||
int (*media_changed) (struct gendisk *);
|
int (*media_changed) (struct gendisk *);
|
||||||
void (*unlock_native_capacity) (struct gendisk *);
|
void (*unlock_native_capacity) (struct gendisk *);
|
||||||
int (*revalidate_disk) (struct gendisk *);
|
int (*revalidate_disk) (struct gendisk *);
|
||||||
|
|
|
@ -662,6 +662,7 @@ struct block_device {
|
||||||
void * bd_claiming;
|
void * bd_claiming;
|
||||||
void * bd_holder;
|
void * bd_holder;
|
||||||
int bd_holders;
|
int bd_holders;
|
||||||
|
bool bd_write_holder;
|
||||||
#ifdef CONFIG_SYSFS
|
#ifdef CONFIG_SYSFS
|
||||||
struct gendisk * bd_holder_disk; /* for sysfs slave linkng */
|
struct gendisk * bd_holder_disk; /* for sysfs slave linkng */
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -127,6 +127,11 @@ struct hd_struct {
|
||||||
#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */
|
#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */
|
||||||
#define GENHD_FL_NATIVE_CAPACITY 128
|
#define GENHD_FL_NATIVE_CAPACITY 128
|
||||||
|
|
||||||
|
enum {
|
||||||
|
DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */
|
||||||
|
DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */
|
||||||
|
};
|
||||||
|
|
||||||
#define BLK_SCSI_MAX_CMDS (256)
|
#define BLK_SCSI_MAX_CMDS (256)
|
||||||
#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
|
#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
|
||||||
|
|
||||||
|
@ -143,6 +148,8 @@ struct disk_part_tbl {
|
||||||
struct hd_struct __rcu *part[];
|
struct hd_struct __rcu *part[];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct disk_events;
|
||||||
|
|
||||||
struct gendisk {
|
struct gendisk {
|
||||||
/* major, first_minor and minors are input parameters only,
|
/* major, first_minor and minors are input parameters only,
|
||||||
* don't use directly. Use disk_devt() and disk_max_parts().
|
* don't use directly. Use disk_devt() and disk_max_parts().
|
||||||
|
@ -154,6 +161,10 @@ struct gendisk {
|
||||||
|
|
||||||
char disk_name[DISK_NAME_LEN]; /* name of major driver */
|
char disk_name[DISK_NAME_LEN]; /* name of major driver */
|
||||||
char *(*devnode)(struct gendisk *gd, mode_t *mode);
|
char *(*devnode)(struct gendisk *gd, mode_t *mode);
|
||||||
|
|
||||||
|
unsigned int events; /* supported events */
|
||||||
|
unsigned int async_events; /* async events, subset of all */
|
||||||
|
|
||||||
/* Array of pointers to partitions indexed by partno.
|
/* Array of pointers to partitions indexed by partno.
|
||||||
* Protected with matching bdev lock but stat and other
|
* Protected with matching bdev lock but stat and other
|
||||||
* non-critical accesses use RCU. Always access through
|
* non-critical accesses use RCU. Always access through
|
||||||
|
@ -171,8 +182,8 @@ struct gendisk {
|
||||||
struct kobject *slave_dir;
|
struct kobject *slave_dir;
|
||||||
|
|
||||||
struct timer_rand_state *random;
|
struct timer_rand_state *random;
|
||||||
|
|
||||||
atomic_t sync_io; /* RAID */
|
atomic_t sync_io; /* RAID */
|
||||||
|
struct disk_events *ev;
|
||||||
#ifdef CONFIG_BLK_DEV_INTEGRITY
|
#ifdef CONFIG_BLK_DEV_INTEGRITY
|
||||||
struct blk_integrity *integrity;
|
struct blk_integrity *integrity;
|
||||||
#endif
|
#endif
|
||||||
|
@ -405,6 +416,11 @@ static inline int get_disk_ro(struct gendisk *disk)
|
||||||
return disk->part0.policy;
|
return disk->part0.policy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern void disk_block_events(struct gendisk *disk);
|
||||||
|
extern void disk_unblock_events(struct gendisk *disk);
|
||||||
|
extern void disk_check_events(struct gendisk *disk);
|
||||||
|
extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask);
|
||||||
|
|
||||||
/* drivers/char/random.c */
|
/* drivers/char/random.c */
|
||||||
extern void add_disk_randomness(struct gendisk *disk);
|
extern void add_disk_randomness(struct gendisk *disk);
|
||||||
extern void rand_initialize_disk(struct gendisk *disk);
|
extern void rand_initialize_disk(struct gendisk *disk);
|
||||||
|
|
Loading…
Reference in a new issue