430c62fb29
2.6.36 introduces an API for drivers to switch the IO scheduler instead of manually calling the elevator exit and init functions. This API was added since q->elevator must be cleared in between those two calls. And since we already have this functionality directly from use by the sysfs interface to switch schedulers online, it was prudent to reuse it internally too. But this API needs the queue to be in a fully initialized state before it is called, or it will attempt to unregister elevator kobjects before they have been added. This results in an oops like this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000051 IP: [<ffffffff8116f15e>] sysfs_create_dir+0x2e/0xc0 PGD 47ddfc067 PUD 47c6a1067 PMD 0 Oops: 0000 [#1] PREEMPT SMP last sysfs file: /sys/devices/pci0000:00/0000:00:02.0/0000:04:00.1/irq CPU 2 Modules linked in: t(+) loop hid_apple usbhid ahci ehci_hcd uhci_hcd libahci usbcore nls_base igb Pid: 7319, comm: modprobe Not tainted 2.6.36-rc6+ #132 QSSC-S4R/QSSC-S4R RIP: 0010:[<ffffffff8116f15e>] [<ffffffff8116f15e>] sysfs_create_dir+0x2e/0xc0 RSP: 0018:ffff88027da25d08 EFLAGS: 00010246 RAX: ffff88047c68c528 RBX: 00000000fffffffe RCX: 0000000000000000 RDX: 000000000000002f RSI: 000000000000002f RDI: ffff88047e196c88 RBP: ffff88027da25d38 R08: 0000000000000000 R09: d84156c5635688c0 R10: d84156c5635688c0 R11: 0000000000000000 R12: ffff88047e196c88 R13: 0000000000000000 R14: 0000000000000000 R15: ffff88047c68c528 FS: 00007fcb0b26f6e0(0000) GS:ffff880287400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000051 CR3: 000000047e76e000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process modprobe (pid: 7319, threadinfo ffff88027da24000, task ffff88027d377090) Stack: ffff88027da25d58 ffff88047c68c528 00000000fffffffe ffff88047e196c88 <0> ffff88047c68c528 ffff88047e05bd90 ffff88027da25d78 ffffffff8123fb77 <0> ffff88047e05bd90 0000000000000000 ffff88047e196c88 ffff88047c68c528 Call Trace: [<ffffffff8123fb77>] kobject_add_internal+0xe7/0x1f0 [<ffffffff8123fd98>] kobject_add_varg+0x38/0x60 [<ffffffff8123feb9>] kobject_add+0x69/0x90 [<ffffffff8116efe0>] ? sysfs_remove_dir+0x20/0xa0 [<ffffffff8103d48d>] ? sub_preempt_count+0x9d/0xe0 [<ffffffff8143de20>] ? _raw_spin_unlock+0x30/0x50 [<ffffffff8116efe0>] ? sysfs_remove_dir+0x20/0xa0 [<ffffffff8116eff4>] ? sysfs_remove_dir+0x34/0xa0 [<ffffffff81224204>] elv_register_queue+0x34/0xa0 [<ffffffff81224aad>] elevator_change+0xfd/0x250 [<ffffffffa007e000>] ? t_init+0x0/0x361 [t] [<ffffffffa007e000>] ? t_init+0x0/0x361 [t] [<ffffffffa007e0a8>] t_init+0xa8/0x361 [t] [<ffffffff810001de>] do_one_initcall+0x3e/0x170 [<ffffffff8108c3fd>] sys_init_module+0xbd/0x220 [<ffffffff81002f2b>] system_call_fastpath+0x16/0x1b Code: e5 41 56 41 55 41 54 49 89 fc 53 48 83 ec 10 48 85 ff 74 52 48 8b 47 18 49 c7 c5 00 46 61 81 48 85 c0 74 04 4c 8b 68 30 45 31 f6 <41> 80 7d 51 00 74 0e 49 8b 44 24 28 4c 89 e7 ff 50 20 49 89 c6 RIP [<ffffffff8116f15e>] sysfs_create_dir+0x2e/0xc0 RSP <ffff88027da25d08> CR2: 0000000000000051 ---[ end trace a6541d3bf07945df ]--- Fix this by adding a registered bit to the elevator queue, which is set when the sysfs kobjects have been registered. Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
219 lines
7.1 KiB
C
219 lines
7.1 KiB
C
#ifndef _LINUX_ELEVATOR_H
|
|
#define _LINUX_ELEVATOR_H
|
|
|
|
#include <linux/percpu.h>
|
|
|
|
#ifdef CONFIG_BLOCK
|
|
|
|
typedef int (elevator_merge_fn) (struct request_queue *, struct request **,
|
|
struct bio *);
|
|
|
|
typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, struct request *);
|
|
|
|
typedef void (elevator_merged_fn) (struct request_queue *, struct request *, int);
|
|
|
|
typedef int (elevator_allow_merge_fn) (struct request_queue *, struct request *, struct bio *);
|
|
|
|
typedef void (elevator_bio_merged_fn) (struct request_queue *,
|
|
struct request *, struct bio *);
|
|
|
|
typedef int (elevator_dispatch_fn) (struct request_queue *, int);
|
|
|
|
typedef void (elevator_add_req_fn) (struct request_queue *, struct request *);
|
|
typedef int (elevator_queue_empty_fn) (struct request_queue *);
|
|
typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *);
|
|
typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
|
|
typedef int (elevator_may_queue_fn) (struct request_queue *, int);
|
|
|
|
typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t);
|
|
typedef void (elevator_put_req_fn) (struct request *);
|
|
typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
|
|
typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
|
|
|
|
typedef void *(elevator_init_fn) (struct request_queue *);
|
|
typedef void (elevator_exit_fn) (struct elevator_queue *);
|
|
|
|
struct elevator_ops
|
|
{
|
|
elevator_merge_fn *elevator_merge_fn;
|
|
elevator_merged_fn *elevator_merged_fn;
|
|
elevator_merge_req_fn *elevator_merge_req_fn;
|
|
elevator_allow_merge_fn *elevator_allow_merge_fn;
|
|
elevator_bio_merged_fn *elevator_bio_merged_fn;
|
|
|
|
elevator_dispatch_fn *elevator_dispatch_fn;
|
|
elevator_add_req_fn *elevator_add_req_fn;
|
|
elevator_activate_req_fn *elevator_activate_req_fn;
|
|
elevator_deactivate_req_fn *elevator_deactivate_req_fn;
|
|
|
|
elevator_queue_empty_fn *elevator_queue_empty_fn;
|
|
elevator_completed_req_fn *elevator_completed_req_fn;
|
|
|
|
elevator_request_list_fn *elevator_former_req_fn;
|
|
elevator_request_list_fn *elevator_latter_req_fn;
|
|
|
|
elevator_set_req_fn *elevator_set_req_fn;
|
|
elevator_put_req_fn *elevator_put_req_fn;
|
|
|
|
elevator_may_queue_fn *elevator_may_queue_fn;
|
|
|
|
elevator_init_fn *elevator_init_fn;
|
|
elevator_exit_fn *elevator_exit_fn;
|
|
void (*trim)(struct io_context *);
|
|
};
|
|
|
|
#define ELV_NAME_MAX (16)
|
|
|
|
struct elv_fs_entry {
|
|
struct attribute attr;
|
|
ssize_t (*show)(struct elevator_queue *, char *);
|
|
ssize_t (*store)(struct elevator_queue *, const char *, size_t);
|
|
};
|
|
|
|
/*
|
|
* identifies an elevator type, such as AS or deadline
|
|
*/
|
|
struct elevator_type
|
|
{
|
|
struct list_head list;
|
|
struct elevator_ops ops;
|
|
struct elv_fs_entry *elevator_attrs;
|
|
char elevator_name[ELV_NAME_MAX];
|
|
struct module *elevator_owner;
|
|
};
|
|
|
|
/*
|
|
* each queue has an elevator_queue associated with it
|
|
*/
|
|
struct elevator_queue
|
|
{
|
|
struct elevator_ops *ops;
|
|
void *elevator_data;
|
|
struct kobject kobj;
|
|
struct elevator_type *elevator_type;
|
|
struct mutex sysfs_lock;
|
|
struct hlist_head *hash;
|
|
unsigned int registered:1;
|
|
};
|
|
|
|
/*
|
|
* block elevator interface
|
|
*/
|
|
extern void elv_dispatch_sort(struct request_queue *, struct request *);
|
|
extern void elv_dispatch_add_tail(struct request_queue *, struct request *);
|
|
extern void elv_add_request(struct request_queue *, struct request *, int, int);
|
|
extern void __elv_add_request(struct request_queue *, struct request *, int, int);
|
|
extern void elv_insert(struct request_queue *, struct request *, int);
|
|
extern int elv_merge(struct request_queue *, struct request **, struct bio *);
|
|
extern void elv_merge_requests(struct request_queue *, struct request *,
|
|
struct request *);
|
|
extern void elv_merged_request(struct request_queue *, struct request *, int);
|
|
extern void elv_bio_merged(struct request_queue *q, struct request *,
|
|
struct bio *);
|
|
extern void elv_requeue_request(struct request_queue *, struct request *);
|
|
extern int elv_queue_empty(struct request_queue *);
|
|
extern struct request *elv_former_request(struct request_queue *, struct request *);
|
|
extern struct request *elv_latter_request(struct request_queue *, struct request *);
|
|
extern int elv_register_queue(struct request_queue *q);
|
|
extern void elv_unregister_queue(struct request_queue *q);
|
|
extern int elv_may_queue(struct request_queue *, int);
|
|
extern void elv_abort_queue(struct request_queue *);
|
|
extern void elv_completed_request(struct request_queue *, struct request *);
|
|
extern int elv_set_request(struct request_queue *, struct request *, gfp_t);
|
|
extern void elv_put_request(struct request_queue *, struct request *);
|
|
extern void elv_drain_elevator(struct request_queue *);
|
|
|
|
/*
|
|
* io scheduler registration
|
|
*/
|
|
extern void elv_register(struct elevator_type *);
|
|
extern void elv_unregister(struct elevator_type *);
|
|
|
|
/*
|
|
* io scheduler sysfs switching
|
|
*/
|
|
extern ssize_t elv_iosched_show(struct request_queue *, char *);
|
|
extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
|
|
|
|
extern int elevator_init(struct request_queue *, char *);
|
|
extern void elevator_exit(struct elevator_queue *);
|
|
extern int elevator_change(struct request_queue *, const char *);
|
|
extern int elv_rq_merge_ok(struct request *, struct bio *);
|
|
|
|
/*
|
|
* Helper functions.
|
|
*/
|
|
extern struct request *elv_rb_former_request(struct request_queue *, struct request *);
|
|
extern struct request *elv_rb_latter_request(struct request_queue *, struct request *);
|
|
|
|
/*
|
|
* rb support functions.
|
|
*/
|
|
extern struct request *elv_rb_add(struct rb_root *, struct request *);
|
|
extern void elv_rb_del(struct rb_root *, struct request *);
|
|
extern struct request *elv_rb_find(struct rb_root *, sector_t);
|
|
|
|
/*
|
|
* Return values from elevator merger
|
|
*/
|
|
#define ELEVATOR_NO_MERGE 0
|
|
#define ELEVATOR_FRONT_MERGE 1
|
|
#define ELEVATOR_BACK_MERGE 2
|
|
|
|
/*
|
|
* Insertion selection
|
|
*/
|
|
#define ELEVATOR_INSERT_FRONT 1
|
|
#define ELEVATOR_INSERT_BACK 2
|
|
#define ELEVATOR_INSERT_SORT 3
|
|
#define ELEVATOR_INSERT_REQUEUE 4
|
|
|
|
/*
|
|
* return values from elevator_may_queue_fn
|
|
*/
|
|
enum {
|
|
ELV_MQUEUE_MAY,
|
|
ELV_MQUEUE_NO,
|
|
ELV_MQUEUE_MUST,
|
|
};
|
|
|
|
#define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
|
|
#define rb_entry_rq(node) rb_entry((node), struct request, rb_node)
|
|
|
|
/*
|
|
* Hack to reuse the csd.list list_head as the fifo time holder while
|
|
* the request is in the io scheduler. Saves an unsigned long in rq.
|
|
*/
|
|
#define rq_fifo_time(rq) ((unsigned long) (rq)->csd.list.next)
|
|
#define rq_set_fifo_time(rq,exp) ((rq)->csd.list.next = (void *) (exp))
|
|
#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
|
|
#define rq_fifo_clear(rq) do { \
|
|
list_del_init(&(rq)->queuelist); \
|
|
INIT_LIST_HEAD(&(rq)->csd.list); \
|
|
} while (0)
|
|
|
|
/*
|
|
* io context count accounting
|
|
*/
|
|
#define elv_ioc_count_mod(name, __val) \
|
|
do { \
|
|
preempt_disable(); \
|
|
__get_cpu_var(name) += (__val); \
|
|
preempt_enable(); \
|
|
} while (0)
|
|
|
|
#define elv_ioc_count_inc(name) elv_ioc_count_mod(name, 1)
|
|
#define elv_ioc_count_dec(name) elv_ioc_count_mod(name, -1)
|
|
|
|
#define elv_ioc_count_read(name) \
|
|
({ \
|
|
unsigned long __val = 0; \
|
|
int __cpu; \
|
|
smp_wmb(); \
|
|
for_each_possible_cpu(__cpu) \
|
|
__val += per_cpu(name, __cpu); \
|
|
__val; \
|
|
})
|
|
|
|
#endif /* CONFIG_BLOCK */
|
|
#endif
|