From cb9f780aa93b752ec935e6771a251717d867e603 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 17:54:35 -0700 Subject: [PATCH 01/30] s390: qdio: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Peter Oberparleiter Cc: Thomas Gleixner Signed-off-by: Kees Cook [sebott: fixed compile error due to invalid struct member] Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- drivers/s390/cio/qdio.h | 2 +- drivers/s390/cio/qdio_main.c | 4 ++-- drivers/s390/cio/qdio_setup.c | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 29d6b5222f1c..a6f7c2986b94 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -393,7 +393,7 @@ int test_nonshared_ind(struct qdio_irq *); /* prototypes for setup */ void qdio_inbound_processing(unsigned long data); void qdio_outbound_processing(unsigned long data); -void qdio_outbound_timer(unsigned long data); +void qdio_outbound_timer(struct timer_list *t); void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, struct irb *irb); int qdio_allocate_qs(struct qdio_irq *irq_ptr, int nr_input_qs, diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index a4ad39ba3873..ed4852fab44b 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -894,9 +894,9 @@ void qdio_outbound_processing(unsigned long data) __qdio_outbound_processing(q); } -void qdio_outbound_timer(unsigned long data) +void qdio_outbound_timer(struct timer_list *t) { - struct qdio_q *q = (struct qdio_q *)data; + struct qdio_q *q = from_timer(q, t, u.out.timer); qdio_tasklet_schedule(q); } diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 48b3866a9ded..9ae1380cbc31 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -252,8 +252,7 @@ static void setup_queues(struct qdio_irq *irq_ptr, tasklet_init(&q->tasklet, qdio_outbound_processing, (unsigned long) q); - setup_timer(&q->u.out.timer, (void(*)(unsigned long)) - &qdio_outbound_timer, (unsigned long)q); + timer_setup(&q->u.out.timer, qdio_outbound_timer, 0); } } From 846d0c6f794c4bef90a021b18cedde598758507c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 16:43:25 -0700 Subject: [PATCH 02/30] s390/cio: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Sebastian Ott Cc: Peter Oberparleiter Signed-off-by: Kees Cook Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- drivers/s390/cio/device.c | 8 ++++---- drivers/s390/cio/device.h | 2 ++ drivers/s390/cio/device_fsm.c | 10 ++++------ drivers/s390/cio/eadm_sch.c | 9 ++++----- 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index e5c32f4b5287..318d8269f5de 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -142,7 +142,7 @@ static void io_subchannel_shutdown(struct subchannel *); static int io_subchannel_sch_event(struct subchannel *, int); static int io_subchannel_chp_event(struct subchannel *, struct chp_link *, int); -static void recovery_func(unsigned long data); +static void recovery_func(struct timer_list *unused); static struct css_device_id io_subchannel_ids[] = { { .match_flags = 0x1, .type = SUBCHANNEL_TYPE_IO, }, @@ -194,7 +194,7 @@ int __init io_subchannel_init(void) { int ret; - setup_timer(&recovery_timer, recovery_func, 0); + timer_setup(&recovery_timer, recovery_func, 0); ret = bus_register(&ccw_bus_type); if (ret) return ret; @@ -726,7 +726,7 @@ static int io_subchannel_initialize_dev(struct subchannel *sch, INIT_WORK(&priv->todo_work, ccw_device_todo); INIT_LIST_HEAD(&priv->cmb_list); init_waitqueue_head(&priv->wait_q); - init_timer(&priv->timer); + timer_setup(&priv->timer, ccw_device_timeout, 0); atomic_set(&priv->onoff, 0); cdev->ccwlock = sch->lock; @@ -1271,7 +1271,7 @@ static void recovery_work_func(struct work_struct *unused) static DECLARE_WORK(recovery_work, recovery_work_func); -static void recovery_func(unsigned long data) +static void recovery_func(struct timer_list *unused) { /* * We can't do our recovery in softirq context and it's not diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h index b37c22adcc7a..f5c427ec24b1 100644 --- a/drivers/s390/cio/device.h +++ b/drivers/s390/cio/device.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -134,6 +135,7 @@ int ccw_device_notify(struct ccw_device *, int); void ccw_device_set_disconnected(struct ccw_device *cdev); void ccw_device_set_notoper(struct ccw_device *cdev); +void ccw_device_timeout(struct timer_list *t); void ccw_device_set_timeout(struct ccw_device *, int); void ccw_device_schedule_recovery(void); diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index f98ea674c3d8..dd7d79d30edc 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -91,12 +91,12 @@ static void ccw_timeout_log(struct ccw_device *cdev) /* * Timeout function. It just triggers a DEV_EVENT_TIMEOUT. */ -static void -ccw_device_timeout(unsigned long data) +void +ccw_device_timeout(struct timer_list *t) { - struct ccw_device *cdev; + struct ccw_device_private *priv = from_timer(priv, t, timer); + struct ccw_device *cdev = priv->cdev; - cdev = (struct ccw_device *) data; spin_lock_irq(cdev->ccwlock); if (timeout_log_enabled) ccw_timeout_log(cdev); @@ -118,8 +118,6 @@ ccw_device_set_timeout(struct ccw_device *cdev, int expires) if (mod_timer(&cdev->private->timer, jiffies + expires)) return; } - cdev->private->timer.function = ccw_device_timeout; - cdev->private->timer.data = (unsigned long) cdev; cdev->private->timer.expires = jiffies + expires; add_timer(&cdev->private->timer); } diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c index d14795f7110b..ce16e4f45d44 100644 --- a/drivers/s390/cio/eadm_sch.c +++ b/drivers/s390/cio/eadm_sch.c @@ -94,9 +94,10 @@ static int eadm_subchannel_clear(struct subchannel *sch) return 0; } -static void eadm_subchannel_timeout(unsigned long data) +static void eadm_subchannel_timeout(struct timer_list *t) { - struct subchannel *sch = (struct subchannel *) data; + struct eadm_private *private = from_timer(private, t, timer); + struct subchannel *sch = private->sch; spin_lock_irq(sch->lock); EADM_LOG(1, "timeout"); @@ -118,8 +119,6 @@ static void eadm_subchannel_set_timeout(struct subchannel *sch, int expires) if (mod_timer(&private->timer, jiffies + expires)) return; } - private->timer.function = eadm_subchannel_timeout; - private->timer.data = (unsigned long) sch; private->timer.expires = jiffies + expires; add_timer(&private->timer); } @@ -224,7 +223,7 @@ static int eadm_subchannel_probe(struct subchannel *sch) return -ENOMEM; INIT_LIST_HEAD(&private->head); - init_timer(&private->timer); + timer_setup(&private->timer, eadm_subchannel_timeout, 0); spin_lock_irq(sch->lock); set_eadm_private(sch, private); From c9602ee7d14a72086d10b50ac68e1ea5c01e7579 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 16:44:30 -0700 Subject: [PATCH 03/30] s390/sclp: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Instead of creating an external static data variable, just define a separate callback which encodes the "force restart" desire. Cc: Peter Oberparleiter Cc: Greg Kroah-Hartman Signed-off-by: Kees Cook [heiko.carstens@de.ibm.com: get rid of compile warning] Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- drivers/s390/char/con3215.c | 6 ++--- drivers/s390/char/con3270.c | 10 ++++---- drivers/s390/char/sclp.c | 45 ++++++++++++++++++++-------------- drivers/s390/char/sclp_con.c | 5 ++-- drivers/s390/char/sclp_tty.c | 5 ++-- drivers/s390/char/sclp_vt220.c | 6 ++--- drivers/s390/char/tape_core.c | 14 +++-------- drivers/s390/char/tty3270.c | 8 +++--- 8 files changed, 48 insertions(+), 51 deletions(-) diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index 353f0bebcf8c..8c9d412b6d33 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -282,9 +282,9 @@ static void raw3215_start_io(struct raw3215_info *raw) /* * Function to start a delayed output after RAW3215_TIMEOUT seconds */ -static void raw3215_timeout(unsigned long __data) +static void raw3215_timeout(struct timer_list *t) { - struct raw3215_info *raw = (struct raw3215_info *) __data; + struct raw3215_info *raw = from_timer(raw, t, timer); unsigned long flags; spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); @@ -670,7 +670,7 @@ static struct raw3215_info *raw3215_alloc_info(void) return NULL; } - setup_timer(&info->timer, raw3215_timeout, (unsigned long)info); + timer_setup(&info->timer, raw3215_timeout, 0); init_waitqueue_head(&info->empty_wait); tasklet_init(&info->tlet, raw3215_wakeup, (unsigned long)info); tty_port_init(&info->port); diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c index be3e3c1206c2..fd2146bcc0ad 100644 --- a/drivers/s390/char/con3270.c +++ b/drivers/s390/char/con3270.c @@ -69,7 +69,7 @@ static struct con3270 *condev; #define CON_UPDATE_STATUS 4 /* Update status line. */ #define CON_UPDATE_ALL 8 /* Recreate screen. */ -static void con3270_update(struct con3270 *); +static void con3270_update(struct timer_list *); /* * Setup timeout for a device. On timeout trigger an update. @@ -205,8 +205,9 @@ con3270_write_callback(struct raw3270_request *rq, void *data) * Update console display. */ static void -con3270_update(struct con3270 *cp) +con3270_update(struct timer_list *t) { + struct con3270 *cp = from_timer(cp, t, timer); struct raw3270_request *wrq; char wcc, prolog[6]; unsigned long flags; @@ -552,7 +553,7 @@ con3270_flush(void) con3270_update_status(cp); while (cp->update_flags != 0) { spin_unlock_irqrestore(&cp->view.lock, flags); - con3270_update(cp); + con3270_update(&cp->timer); spin_lock_irqsave(&cp->view.lock, flags); con3270_wait_write(cp); } @@ -623,8 +624,7 @@ con3270_init(void) INIT_LIST_HEAD(&condev->lines); INIT_LIST_HEAD(&condev->update); - setup_timer(&condev->timer, (void (*)(unsigned long)) con3270_update, - (unsigned long) condev); + timer_setup(&condev->timer, con3270_update, 0); tasklet_init(&condev->readlet, (void (*)(unsigned long)) con3270_read_tasklet, (unsigned long) condev->read); diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index 41d8aa96801f..9b4c61c1e309 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -136,6 +136,7 @@ static enum sclp_suspend_state_t { #define SCLP_BUSY_INTERVAL 10 #define SCLP_RETRY_INTERVAL 30 +static void sclp_request_timeout(bool force_restart); static void sclp_process_queue(void); static void __sclp_make_read_req(void); static int sclp_init_mask(int calculate); @@ -154,25 +155,32 @@ __sclp_queue_read_req(void) /* Set up request retry timer. Called while sclp_lock is locked. */ static inline void -__sclp_set_request_timer(unsigned long time, void (*function)(unsigned long), - unsigned long data) +__sclp_set_request_timer(unsigned long time, void (*cb)(struct timer_list *)) { del_timer(&sclp_request_timer); - sclp_request_timer.function = function; - sclp_request_timer.data = data; + sclp_request_timer.function = (TIMER_FUNC_TYPE)cb; sclp_request_timer.expires = jiffies + time; add_timer(&sclp_request_timer); } -/* Request timeout handler. Restart the request queue. If DATA is non-zero, +static void sclp_request_timeout_restart(struct timer_list *unused) +{ + sclp_request_timeout(true); +} + +static void sclp_request_timeout_normal(struct timer_list *unused) +{ + sclp_request_timeout(false); +} + +/* Request timeout handler. Restart the request queue. If force_restart, * force restart of running request. */ -static void -sclp_request_timeout(unsigned long data) +static void sclp_request_timeout(bool force_restart) { unsigned long flags; spin_lock_irqsave(&sclp_lock, flags); - if (data) { + if (force_restart) { if (sclp_running_state == sclp_running_state_running) { /* Break running state and queue NOP read event request * to get a defined interface state. */ @@ -181,7 +189,7 @@ sclp_request_timeout(unsigned long data) } } else { __sclp_set_request_timer(SCLP_BUSY_INTERVAL * HZ, - sclp_request_timeout, 0); + sclp_request_timeout_normal); } spin_unlock_irqrestore(&sclp_lock, flags); sclp_process_queue(); @@ -239,7 +247,7 @@ static struct sclp_req *__sclp_req_queue_remove_expired_req(void) * invokes callback. This timer can be set per request in situations where * waiting too long would be harmful to the system, e.g. during SE reboot. */ -static void sclp_req_queue_timeout(unsigned long data) +static void sclp_req_queue_timeout(struct timer_list *unused) { unsigned long flags, expires_next; struct sclp_req *req; @@ -276,12 +284,12 @@ __sclp_start_request(struct sclp_req *req) req->status = SCLP_REQ_RUNNING; sclp_running_state = sclp_running_state_running; __sclp_set_request_timer(SCLP_RETRY_INTERVAL * HZ, - sclp_request_timeout, 1); + sclp_request_timeout_restart); return 0; } else if (rc == -EBUSY) { /* Try again later */ __sclp_set_request_timer(SCLP_BUSY_INTERVAL * HZ, - sclp_request_timeout, 0); + sclp_request_timeout_normal); return 0; } /* Request failed */ @@ -315,7 +323,7 @@ sclp_process_queue(void) /* Cannot abort already submitted request - could still * be active at the SCLP */ __sclp_set_request_timer(SCLP_BUSY_INTERVAL * HZ, - sclp_request_timeout, 0); + sclp_request_timeout_normal); break; } do_post: @@ -558,7 +566,7 @@ sclp_sync_wait(void) if (timer_pending(&sclp_request_timer) && get_tod_clock_fast() > timeout && del_timer(&sclp_request_timer)) - sclp_request_timer.function(sclp_request_timer.data); + sclp_request_timer.function((TIMER_DATA_TYPE)&sclp_request_timer); cpu_relax(); } local_irq_disable(); @@ -915,7 +923,7 @@ static void sclp_check_handler(struct ext_code ext_code, /* Initial init mask request timed out. Modify request state to failed. */ static void -sclp_check_timeout(unsigned long data) +sclp_check_timeout(struct timer_list *unused) { unsigned long flags; @@ -954,7 +962,7 @@ sclp_check_interface(void) sclp_init_req.status = SCLP_REQ_RUNNING; sclp_running_state = sclp_running_state_running; __sclp_set_request_timer(SCLP_RETRY_INTERVAL * HZ, - sclp_check_timeout, 0); + sclp_check_timeout); spin_unlock_irqrestore(&sclp_lock, flags); /* Enable service-signal interruption - needs to happen * with IRQs enabled. */ @@ -1159,9 +1167,8 @@ sclp_init(void) INIT_LIST_HEAD(&sclp_req_queue); INIT_LIST_HEAD(&sclp_reg_list); list_add(&sclp_state_change_event.list, &sclp_reg_list); - init_timer(&sclp_request_timer); - init_timer(&sclp_queue_timer); - sclp_queue_timer.function = sclp_req_queue_timeout; + timer_setup(&sclp_request_timer, NULL, 0); + timer_setup(&sclp_queue_timer, sclp_req_queue_timeout, 0); /* Check interface */ spin_unlock_irqrestore(&sclp_lock, flags); rc = sclp_check_interface(); diff --git a/drivers/s390/char/sclp_con.c b/drivers/s390/char/sclp_con.c index 7027e61a6931..8966a1c1b548 100644 --- a/drivers/s390/char/sclp_con.c +++ b/drivers/s390/char/sclp_con.c @@ -125,7 +125,7 @@ static void sclp_console_sync_queue(void) * temporary write buffer without further waiting on a final new line. */ static void -sclp_console_timeout(unsigned long data) +sclp_console_timeout(struct timer_list *unused) { sclp_conbuf_emit(); } @@ -211,7 +211,6 @@ sclp_console_write(struct console *console, const char *message, /* Setup timer to output current console buffer after 1/10 second */ if (sclp_conbuf != NULL && sclp_chars_in_buffer(sclp_conbuf) != 0 && !timer_pending(&sclp_con_timer)) { - setup_timer(&sclp_con_timer, sclp_console_timeout, 0UL); mod_timer(&sclp_con_timer, jiffies + HZ / 10); } out: @@ -332,7 +331,7 @@ sclp_console_init(void) INIT_LIST_HEAD(&sclp_con_outqueue); spin_lock_init(&sclp_con_lock); sclp_conbuf = NULL; - init_timer(&sclp_con_timer); + timer_setup(&sclp_con_timer, sclp_console_timeout, 0); /* Set output format */ if (MACHINE_IS_VM) diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c index 1cceefdc03e0..9f7b87d6d434 100644 --- a/drivers/s390/char/sclp_tty.c +++ b/drivers/s390/char/sclp_tty.c @@ -151,7 +151,7 @@ __sclp_ttybuf_emit(struct sclp_buffer *buffer) * temporary write buffer. */ static void -sclp_tty_timeout(unsigned long data) +sclp_tty_timeout(struct timer_list *unused) { unsigned long flags; struct sclp_buffer *buf; @@ -218,7 +218,6 @@ static int sclp_tty_write_string(const unsigned char *str, int count, int may_fa /* Setup timer to output current console buffer after 1/10 second */ if (sclp_ttybuf && sclp_chars_in_buffer(sclp_ttybuf) && !timer_pending(&sclp_tty_timer)) { - setup_timer(&sclp_tty_timer, sclp_tty_timeout, 0UL); mod_timer(&sclp_tty_timer, jiffies + HZ / 10); } spin_unlock_irqrestore(&sclp_tty_lock, flags); @@ -526,7 +525,7 @@ sclp_tty_init(void) } INIT_LIST_HEAD(&sclp_tty_outqueue); spin_lock_init(&sclp_tty_lock); - init_timer(&sclp_tty_timer); + timer_setup(&sclp_tty_timer, sclp_tty_timeout, 0); sclp_ttybuf = NULL; sclp_tty_buffer_count = 0; if (MACHINE_IS_VM) { diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c index e84395d71389..3f9a6ef650fa 100644 --- a/drivers/s390/char/sclp_vt220.c +++ b/drivers/s390/char/sclp_vt220.c @@ -357,7 +357,7 @@ sclp_vt220_add_msg(struct sclp_vt220_request *request, * Emit buffer after having waited long enough for more data to arrive. */ static void -sclp_vt220_timeout(unsigned long data) +sclp_vt220_timeout(struct timer_list *unused) { sclp_vt220_emit_current(); } @@ -454,8 +454,6 @@ __sclp_vt220_write(const unsigned char *buf, int count, int do_schedule, /* Setup timer to output current console buffer after some time */ if (sclp_vt220_current_request != NULL && !timer_pending(&sclp_vt220_timer) && do_schedule) { - sclp_vt220_timer.function = sclp_vt220_timeout; - sclp_vt220_timer.data = 0UL; sclp_vt220_timer.expires = jiffies + BUFFER_MAX_DELAY; add_timer(&sclp_vt220_timer); } @@ -699,7 +697,7 @@ static int __init __sclp_vt220_init(int num_pages) spin_lock_init(&sclp_vt220_lock); INIT_LIST_HEAD(&sclp_vt220_empty); INIT_LIST_HEAD(&sclp_vt220_outqueue); - init_timer(&sclp_vt220_timer); + timer_setup(&sclp_vt220_timer, sclp_vt220_timeout, 0); tty_port_init(&sclp_vt220_port); sclp_vt220_current_request = NULL; sclp_vt220_buffered_chars = 0; diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c index 9dd4534823b3..32503a60ee85 100644 --- a/drivers/s390/char/tape_core.c +++ b/drivers/s390/char/tape_core.c @@ -32,7 +32,7 @@ static void __tape_do_irq (struct ccw_device *, unsigned long, struct irb *); static void tape_delayed_next_request(struct work_struct *); -static void tape_long_busy_timeout(unsigned long data); +static void tape_long_busy_timeout(struct timer_list *t); /* * One list to contain all tape devices of all disciplines, so @@ -381,8 +381,7 @@ tape_generic_online(struct tape_device *device, return -EINVAL; } - init_timer(&device->lb_timeout); - device->lb_timeout.function = tape_long_busy_timeout; + timer_setup(&device->lb_timeout, tape_long_busy_timeout, 0); /* Let the discipline have a go at the device. */ device->discipline = discipline; @@ -867,18 +866,16 @@ tape_delayed_next_request(struct work_struct *work) spin_unlock_irq(get_ccwdev_lock(device->cdev)); } -static void tape_long_busy_timeout(unsigned long data) +static void tape_long_busy_timeout(struct timer_list *t) { + struct tape_device *device = from_timer(device, t, lb_timeout); struct tape_request *request; - struct tape_device *device; - device = (struct tape_device *) data; spin_lock_irq(get_ccwdev_lock(device->cdev)); request = list_entry(device->req_queue.next, struct tape_request, list); BUG_ON(request->status != TAPE_REQUEST_LONG_BUSY); DBF_LH(6, "%08x: Long busy timeout.\n", device->cdev_id); __tape_start_next_request(device); - device->lb_timeout.data = 0UL; tape_put_device(device); spin_unlock_irq(get_ccwdev_lock(device->cdev)); } @@ -1157,7 +1154,6 @@ __tape_do_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb) if (req->status == TAPE_REQUEST_LONG_BUSY) { DBF_EVENT(3, "(%08x): del timer\n", device->cdev_id); if (del_timer(&device->lb_timeout)) { - device->lb_timeout.data = 0UL; tape_put_device(device); __tape_start_next_request(device); } @@ -1212,8 +1208,6 @@ __tape_do_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb) case TAPE_IO_PENDING: break; case TAPE_IO_LONG_BUSY: - device->lb_timeout.data = - (unsigned long) tape_get_device(device); device->lb_timeout.expires = jiffies + LONG_BUSY_TIMEOUT * HZ; DBF_EVENT(3, "(%08x): add timer\n", device->cdev_id); diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index e5ebe2fbee23..e417ccd9e299 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -118,7 +118,7 @@ struct tty3270 { #define TTY_UPDATE_STATUS 8 /* Update status line. */ #define TTY_UPDATE_ALL 16 /* Recreate screen. */ -static void tty3270_update(struct tty3270 *); +static void tty3270_update(struct timer_list *); static void tty3270_resize_work(struct work_struct *work); /* @@ -361,8 +361,9 @@ tty3270_write_callback(struct raw3270_request *rq, void *data) * Update 3270 display. */ static void -tty3270_update(struct tty3270 *tp) +tty3270_update(struct timer_list *t) { + struct tty3270 *tp = from_timer(tp, t, timer); static char invalid_sba[2] = { 0xff, 0xff }; struct raw3270_request *wrq; unsigned long updated; @@ -748,8 +749,7 @@ tty3270_alloc_view(void) goto out_reset; tty_port_init(&tp->port); - setup_timer(&tp->timer, (void (*)(unsigned long)) tty3270_update, - (unsigned long) tp); + timer_setup(&tp->timer, tty3270_update, 0); tasklet_init(&tp->readlet, (void (*)(unsigned long)) tty3270_read_tasklet, (unsigned long) tp->read); From cefbeb5df56e1daf0adda8ca5eecee03c5084af6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Oct 2017 03:27:37 -0700 Subject: [PATCH 04/30] s390/ap_bus: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Harald Freudenberger Signed-off-by: Kees Cook Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- drivers/s390/crypto/ap_bus.c | 10 +++++----- drivers/s390/crypto/ap_bus.h | 2 +- drivers/s390/crypto/ap_queue.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 8b5658b0bec3..faeba9db3d95 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -374,13 +374,13 @@ void ap_wait(enum ap_wait wait) /** * ap_request_timeout(): Handling of request timeouts - * @data: Holds the AP device. + * @t: timer making this callback * * Handles request timeouts. */ -void ap_request_timeout(unsigned long data) +void ap_request_timeout(struct timer_list *t) { - struct ap_queue *aq = (struct ap_queue *) data; + struct ap_queue *aq = from_timer(aq, t, timeout); if (ap_suspend_flag) return; @@ -1203,7 +1203,7 @@ static void ap_scan_bus(struct work_struct *unused) mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ); } -static void ap_config_timeout(unsigned long ptr) +static void ap_config_timeout(struct timer_list *unused) { if (ap_suspend_flag) return; @@ -1306,7 +1306,7 @@ int __init ap_module_init(void) goto out_bus; /* Setup the AP bus rescan timer. */ - setup_timer(&ap_config_timer, ap_config_timeout, 0); + timer_setup(&ap_config_timer, ap_config_timeout, 0); /* * Setup the high resultion poll timer. diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 3a0e19d87e7c..7e45c4d08cad 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -241,7 +241,7 @@ void ap_flush_queue(struct ap_queue *aq); void *ap_airq_ptr(void); void ap_wait(enum ap_wait wait); -void ap_request_timeout(unsigned long data); +void ap_request_timeout(struct timer_list *t); void ap_bus_force_rescan(void); void ap_queue_init_reply(struct ap_queue *aq, struct ap_message *ap_msg); diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index a550d40921e7..ba3a2e13b0eb 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -634,7 +634,7 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type) INIT_LIST_HEAD(&aq->list); INIT_LIST_HEAD(&aq->pendingq); INIT_LIST_HEAD(&aq->requestq); - setup_timer(&aq->timeout, ap_request_timeout, (unsigned long) aq); + timer_setup(&aq->timeout, ap_request_timeout, 0); return aq; } From c771320e9357c9b85634002daedfe5c8988f27a6 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 5 Oct 2017 08:44:26 +0200 Subject: [PATCH 05/30] s390/mm,kvm: improve detection of KVM guest faults The identification of guest fault currently relies on the PF_VCPU flag. This is set in guest_entry_irqoff and cleared in guest_exit_irqoff. Both functions are called by __vcpu_run, the PF_VCPU flag is set for quite a lot of kernel code outside of the guest execution. Replace the PF_VCPU scheme with the PIF_GUEST_FAULT in the pt_regs and make the program check handler code in entry.S set the bit only for exception that occurred between the .Lsie_gmap and .Lsie_done labels. Reviewed-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ptrace.h | 2 ++ arch/s390/kernel/entry.S | 7 +++++-- arch/s390/mm/fault.c | 6 +++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 2f84e77f1f1b..a3788dafc0e1 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -13,10 +13,12 @@ #define PIF_SYSCALL 0 /* inside a system call */ #define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */ #define PIF_SYSCALL_RESTART 2 /* restart the current system call */ +#define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ #define _PIF_SYSCALL _BITUL(PIF_SYSCALL) #define _PIF_PER_TRAP _BITUL(PIF_PER_TRAP) #define _PIF_SYSCALL_RESTART _BITUL(PIF_SYSCALL_RESTART) +#define _PIF_GUEST_FAULT _BITUL(PIF_GUEST_FAULT) #ifndef __ASSEMBLY__ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index f498d201f98d..ee53ac7b1ab8 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -518,6 +518,7 @@ ENTRY(pgm_check_handler) stmg %r8,%r15,__LC_SAVE_AREA_SYNC lg %r10,__LC_LAST_BREAK lg %r12,__LC_CURRENT + lghi %r11,0 larl %r13,cleanup_critical lmg %r8,%r9,__LC_PGM_OLD_PSW tmhh %r8,0x0001 # test problem state bit @@ -532,6 +533,7 @@ ENTRY(pgm_check_handler) ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit + lghi %r11,_PIF_GUEST_FAULT #endif 0: tmhh %r8,0x4000 # PER bit set in old PSW ? jnz 1f # -> enabled, can't be a double fault @@ -549,13 +551,14 @@ ENTRY(pgm_check_handler) jz 3f mvc __THREAD_trap_tdb(256,%r14),0(%r13) 3: stg %r10,__THREAD_last_break(%r14) -4: la %r11,STACK_FRAME_OVERHEAD(%r15) +4: lgr %r13,%r11 + la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE - xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + stg %r13,__PT_FLAGS(%r11) stg %r10,__PT_ARGS(%r11) tm __LC_PGM_ILC+3,0x80 # check for per exception jz 5f diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 242b78c0a9ec..be974b3eb7e4 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -117,7 +117,7 @@ static inline int user_space_fault(struct pt_regs *regs) return 1; if (trans_exc_code == 2) /* secondary space -> set_fs */ return current->thread.mm_segment.ar4; - if (current->flags & PF_VCPU) + if (test_pt_regs_flag(regs, PIF_GUEST_FAULT)) return 1; return 0; } @@ -209,7 +209,7 @@ static void dump_fault_info(struct pt_regs *regs) pr_cont("kernel "); } #ifdef CONFIG_PGSTE - else if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { + else if (test_pt_regs_flag(regs, PIF_GUEST_FAULT)) { struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; asce = gmap->asce; pr_cont("gmap "); @@ -438,7 +438,7 @@ static inline int do_exception(struct pt_regs *regs, int access) down_read(&mm->mmap_sem); #ifdef CONFIG_PGSTE - gmap = (current->flags & PF_VCPU) ? + gmap = test_pt_regs_flag(regs, PIF_GUEST_FAULT) ? (struct gmap *) S390_lowcore.gmap : NULL; if (gmap) { current->thread.gmap_addr = address; From 0aaba41b58bc5f3074c0c0a6136b9500b5e29e19 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 22 Aug 2017 12:08:22 +0200 Subject: [PATCH 06/30] s390: remove all code using the access register mode The vdso code for the getcpu() and the clock_gettime() call use the access register mode to access the per-CPU vdso data page with the current code. An alternative to the complicated AR mode is to use the secondary space mode. This makes the vdso faster and quite a bit simpler. The downside is that the uaccess code has to be changed quite a bit. Which instructions are used depends on the machine and what kind of uaccess operation is requested. The instruction dictates which ASCE value needs to be loaded into %cr1 and %cr7. The different cases: * User copy with MVCOS for z10 and newer machines The MVCOS instruction can copy between the primary space (aka user) and the home space (aka kernel) directly. For set_fs(KERNEL_DS) the kernel ASCE is loaded into %cr1. For set_fs(USER_DS) the user space is already loaded in %cr1. * User copy with MVCP/MVCS for older machines To be able to execute the MVCP/MVCS instructions the kernel needs to switch to primary mode. The control register %cr1 has to be set to the kernel ASCE and %cr7 to either the kernel ASCE or the user ASCE dependent on set_fs(KERNEL_DS) vs set_fs(USER_DS). * Data access in the user address space for strnlen / futex To use "normal" instruction with data from the user address space the secondary space mode is used. The kernel needs to switch to primary mode, %cr1 has to contain the kernel ASCE and %cr7 either the user ASCE or the kernel ASCE, dependent on set_fs. To load a new value into %cr1 or %cr7 is an expensive operation, the kernel tries to be lazy about it. E.g. for multiple user copies in a row with MVCP/MVCS the replacement of the vdso ASCE in %cr7 with the user ASCE is done only once. On return to user space a CPU bit is checked that loads the vdso ASCE again. To enable and disable the data access via the secondary space two new functions are added, enable_sacf_uaccess and disable_sacf_uaccess. The fact that a context is in secondary space uaccess mode is stored in the mm_segment_t value for the task. The code of an interrupt may use set_fs as long as it returns to the previous state it got with get_fs with another call to set_fs. The code in finish_arch_post_lock_switch simply has to do a set_fs with the current mm_segment_t value for the task. For CPUs with MVCOS: CPU running in | %cr1 ASCE | %cr7 ASCE | --------------------------------------|-----------|-----------| user space | user | vdso | kernel, USER_DS, normal-mode | user | vdso | kernel, USER_DS, normal-mode, lazy | user | user | kernel, USER_DS, sacf-mode | kernel | user | kernel, KERNEL_DS, normal-mode | kernel | vdso | kernel, KERNEL_DS, normal-mode, lazy | kernel | kernel | kernel, KERNEL_DS, sacf-mode | kernel | kernel | For CPUs without MVCOS: CPU running in | %cr1 ASCE | %cr7 ASCE | --------------------------------------|-----------|-----------| user space | user | vdso | kernel, USER_DS, normal-mode | user | vdso | kernel, USER_DS, normal-mode lazy | kernel | user | kernel, USER_DS, sacf-mode | kernel | user | kernel, KERNEL_DS, normal-mode | kernel | vdso | kernel, KERNEL_DS, normal-mode, lazy | kernel | kernel | kernel, KERNEL_DS, sacf-mode | kernel | kernel | The lines with "lazy" refer to the state after a copy via the secondary space with a delayed reload of %cr1 and %cr7. There are three hardware address spaces that can cause a DAT exception, primary, secondary and home space. The exception can be related to four different fault types: user space fault, vdso fault, kernel fault, and the gmap faults. Dependent on the set_fs state and normal vs. sacf mode there are a number of fault combinations: 1) user address space fault via the primary ASCE 2) gmap address space fault via the primary ASCE 3) kernel address space fault via the primary ASCE for machines with MVCOS and set_fs(KERNEL_DS) 4) vdso address space faults via the secondary ASCE with an invalid address while running in secondary space in problem state 5) user address space fault via the secondary ASCE for user-copy based on the secondary space mode, e.g. futex_ops or strnlen_user 6) kernel address space fault via the secondary ASCE for user-copy with secondary space mode with set_fs(KERNEL_DS) 7) kernel address space fault via the primary ASCE for user-copy with secondary space mode with set_fs(USER_DS) on machines without MVCOS. 8) kernel address space fault via the home space ASCE Replace user_space_fault() with a new function get_fault_type() that can distinguish all four different fault types. With these changes the futex atomic ops from the kernel and the strnlen_user will get a little bit slower, as well as the old style uaccess with MVCP/MVCS. All user accesses based on MVCOS will be as fast as before. On the positive side, the user space vdso code is a lot faster and Linux ceases to use the complicated AR mode. Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- arch/s390/include/asm/futex.h | 9 +- arch/s390/include/asm/lowcore.h | 33 +++----- arch/s390/include/asm/mmu_context.h | 36 ++++---- arch/s390/include/asm/processor.h | 4 +- arch/s390/include/asm/uaccess.h | 29 ++----- arch/s390/kernel/asm-offsets.c | 2 +- arch/s390/kernel/entry.S | 26 ++++-- arch/s390/kernel/head64.S | 2 +- arch/s390/kernel/vdso.c | 44 +--------- arch/s390/kernel/vdso32/getcpu.S | 16 +--- arch/s390/kernel/vdso64/clock_gettime.S | 19 +---- arch/s390/kernel/vdso64/getcpu.S | 15 +--- arch/s390/lib/uaccess.c | 90 +++++++++++++++++--- arch/s390/mm/fault.c | 108 ++++++++++++++---------- arch/s390/mm/init.c | 1 + arch/s390/mm/pgalloc.c | 4 +- 16 files changed, 228 insertions(+), 210 deletions(-) diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 9b5a3469fed9..5e97a4353147 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -26,9 +26,9 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { int oldval = 0, newval, ret; + mm_segment_t old_fs; - load_kernel_asce(); - + old_fs = enable_sacf_uaccess(); pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -55,6 +55,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, ret = -ENOSYS; } pagefault_enable(); + disable_sacf_uaccess(old_fs); if (!ret) *oval = oldval; @@ -65,9 +66,10 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { + mm_segment_t old_fs; int ret; - load_kernel_asce(); + old_fs = enable_sacf_uaccess(); asm volatile( " sacf 256\n" "0: cs %1,%4,0(%5)\n" @@ -77,6 +79,7 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "=d" (ret), "+d" (oldval), "=m" (*uaddr) : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) : "cc", "memory"); + disable_sacf_uaccess(old_fs); *uval = oldval; return ret; } diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 9eb36a1592c7..2306fa17f6cd 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -115,33 +115,28 @@ struct lowcore { /* Address space pointer. */ __u64 kernel_asce; /* 0x0378 */ __u64 user_asce; /* 0x0380 */ + __u64 vdso_asce; /* 0x0388 */ /* * The lpp and current_pid fields form a * 64-bit value that is set as program * parameter with the LPP instruction. */ - __u32 lpp; /* 0x0388 */ - __u32 current_pid; /* 0x038c */ + __u32 lpp; /* 0x0390 */ + __u32 current_pid; /* 0x0394 */ /* SMP info area */ - __u32 cpu_nr; /* 0x0390 */ - __u32 softirq_pending; /* 0x0394 */ - __u64 percpu_offset; /* 0x0398 */ - __u64 vdso_per_cpu_data; /* 0x03a0 */ - __u64 machine_flags; /* 0x03a8 */ - __u32 preempt_count; /* 0x03b0 */ - __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */ - __u64 gmap; /* 0x03b8 */ - __u32 spinlock_lockval; /* 0x03c0 */ - __u32 spinlock_index; /* 0x03c4 */ - __u32 fpu_flags; /* 0x03c8 */ - __u8 pad_0x03cc[0x0400-0x03cc]; /* 0x03cc */ - - /* Per cpu primary space access list */ - __u32 paste[16]; /* 0x0400 */ - - __u8 pad_0x04c0[0x0e00-0x0440]; /* 0x0440 */ + __u32 cpu_nr; /* 0x0398 */ + __u32 softirq_pending; /* 0x039c */ + __u32 preempt_count; /* 0x03a0 */ + __u32 spinlock_lockval; /* 0x03a4 */ + __u32 spinlock_index; /* 0x03a8 */ + __u32 fpu_flags; /* 0x03ac */ + __u64 percpu_offset; /* 0x03b0 */ + __u64 vdso_per_cpu_data; /* 0x03b8 */ + __u64 machine_flags; /* 0x03c0 */ + __u64 gmap; /* 0x03c8 */ + __u8 pad_0x03d0[0x0e00-0x03d0]; /* 0x03d0 */ /* * 0xe00 contains the address of the IPL Parameter Information diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 43607bb12cc2..6133aa376b7c 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -71,41 +71,38 @@ static inline int init_new_context(struct task_struct *tsk, static inline void set_user_asce(struct mm_struct *mm) { S390_lowcore.user_asce = mm->context.asce; - if (current->thread.mm_segment.ar4) - __ctl_load(S390_lowcore.user_asce, 7, 7); - set_cpu_flag(CIF_ASCE_PRIMARY); + __ctl_load(S390_lowcore.user_asce, 1, 1); + clear_cpu_flag(CIF_ASCE_PRIMARY); } static inline void clear_user_asce(void) { S390_lowcore.user_asce = S390_lowcore.kernel_asce; - - __ctl_load(S390_lowcore.user_asce, 1, 1); - __ctl_load(S390_lowcore.user_asce, 7, 7); -} - -static inline void load_kernel_asce(void) -{ - unsigned long asce; - - __ctl_store(asce, 1, 1); - if (asce != S390_lowcore.kernel_asce) - __ctl_load(S390_lowcore.kernel_asce, 1, 1); + __ctl_load(S390_lowcore.kernel_asce, 1, 1); set_cpu_flag(CIF_ASCE_PRIMARY); } +mm_segment_t enable_sacf_uaccess(void); +void disable_sacf_uaccess(mm_segment_t old_fs); + static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { int cpu = smp_processor_id(); - S390_lowcore.user_asce = next->context.asce; if (prev == next) return; + S390_lowcore.user_asce = next->context.asce; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); - /* Clear old ASCE by loading the kernel ASCE. */ - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - __ctl_load(S390_lowcore.kernel_asce, 7, 7); + /* Clear previous user-ASCE from CR1 and CR7 */ + if (!test_cpu_flag(CIF_ASCE_PRIMARY)) { + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + set_cpu_flag(CIF_ASCE_PRIMARY); + } + if (test_cpu_flag(CIF_ASCE_SECONDARY)) { + __ctl_load(S390_lowcore.vdso_asce, 7, 7); + clear_cpu_flag(CIF_ASCE_SECONDARY); + } cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); } @@ -115,7 +112,6 @@ static inline void finish_arch_post_lock_switch(void) struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - load_kernel_asce(); if (mm) { preempt_disable(); while (atomic_read(&mm->context.flush_count)) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index f25bfe888933..709351bce80e 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -109,9 +109,7 @@ extern void execve_tail(void); #define HAVE_ARCH_PICK_MMAP_LAYOUT -typedef struct { - __u32 ar4; -} mm_segment_t; +typedef unsigned int mm_segment_t; /* * Thread structure diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index cdd0f0d999e2..ad6b91013a05 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -16,7 +16,7 @@ #include #include #include - +#include /* * The fs value determines whether argument validity checking should be @@ -26,27 +26,16 @@ * For historical reasons, these macros are grossly misnamed. */ -#define MAKE_MM_SEG(a) ((mm_segment_t) { (a) }) - - -#define KERNEL_DS MAKE_MM_SEG(0) -#define USER_DS MAKE_MM_SEG(1) +#define KERNEL_DS (0) +#define KERNEL_DS_SACF (1) +#define USER_DS (2) +#define USER_DS_SACF (3) #define get_ds() (KERNEL_DS) #define get_fs() (current->thread.mm_segment) -#define segment_eq(a,b) ((a).ar4 == (b).ar4) +#define segment_eq(a,b) (((a) & 2) == ((b) & 2)) -static inline void set_fs(mm_segment_t fs) -{ - current->thread.mm_segment = fs; - if (uaccess_kernel()) { - set_cpu_flag(CIF_ASCE_SECONDARY); - __ctl_load(S390_lowcore.kernel_asce, 7, 7); - } else { - clear_cpu_flag(CIF_ASCE_SECONDARY); - __ctl_load(S390_lowcore.user_asce, 7, 7); - } -} +void set_fs(mm_segment_t fs); static inline int __range_ok(unsigned long addr, unsigned long size) { @@ -95,7 +84,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { - unsigned long spec = 0x810000UL; + unsigned long spec = 0x010000UL; int rc; switch (size) { @@ -125,7 +114,7 @@ static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { - unsigned long spec = 0x81UL; + unsigned long spec = 0x01UL; int rc; switch (size) { diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 33ec80df7ed4..587b195b588d 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -171,6 +171,7 @@ int main(void) OFFSET(__LC_RESTART_DATA, lowcore, restart_data); OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source); OFFSET(__LC_USER_ASCE, lowcore, user_asce); + OFFSET(__LC_VDSO_ASCE, lowcore, vdso_asce); OFFSET(__LC_LPP, lowcore, lpp); OFFSET(__LC_CURRENT_PID, lowcore, current_pid); OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset); @@ -178,7 +179,6 @@ int main(void) OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags); OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count); OFFSET(__LC_GMAP, lowcore, gmap); - OFFSET(__LC_PASTE, lowcore, paste); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); /* hardware defined lowcore locations 0x1000 - 0x18ff */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index ee53ac7b1ab8..a316cd6999ad 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -379,13 +379,21 @@ ENTRY(system_call) jg s390_handle_mcck # TIF bit will be cleared by handler # -# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce +# _CIF_ASCE_PRIMARY and/or _CIF_ASCE_SECONDARY set, load user space asce # .Lsysc_asce: + ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY + lctlg %c7,%c7,__LC_VDSO_ASCE # load secondary asce + TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY + jz .Lsysc_return +#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES + tm __LC_STFLE_FAC_LIST+3,0x10 # has MVCOS ? + jnz .Lsysc_set_fs_fixup ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY lctlg %c1,%c1,__LC_USER_ASCE # load primary asce - TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_SECONDARY - jz .Lsysc_return + j .Lsysc_return +.Lsysc_set_fs_fixup: +#endif larl %r14,.Lsysc_return jg set_fs_fixup @@ -741,10 +749,18 @@ ENTRY(io_int_handler) # _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce # .Lio_asce: + ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY + lctlg %c7,%c7,__LC_VDSO_ASCE # load secondary asce + TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY + jz .Lio_return +#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES + tm __LC_STFLE_FAC_LIST+3,0x10 # has MVCOS ? + jnz .Lio_set_fs_fixup ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY lctlg %c1,%c1,__LC_USER_ASCE # load primary asce - TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_SECONDARY - jz .Lio_return + j .Lio_return +.Lio_set_fs_fixup: +#endif larl %r14,.Lio_return jg set_fs_fixup diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 172002da7075..38a973ccf501 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -28,7 +28,7 @@ ENTRY(startup_continue) lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area # move IPL device to lowcore - lghi %r0,__LC_PASTE + larl %r0,boot_vdso_data stg %r0,__LC_VDSO_PER_CPU # # Setup stack diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 0520854a4dab..39a218703c50 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -158,16 +158,9 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; struct vdso_per_cpu_data *vd; - u32 *psal, *aste; - int i; - - lowcore->vdso_per_cpu_data = __LC_PASTE; - - if (!vdso_enabled) - return 0; segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); - page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA); + page_table = get_zeroed_page(GFP_KERNEL); page_frame = get_zeroed_page(GFP_KERNEL); if (!segment_table || !page_table || !page_frame) goto out; @@ -179,25 +172,15 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore) vd->cpu_nr = lowcore->cpu_nr; vd->node_id = cpu_to_node(vd->cpu_nr); - /* Set up access register mode page table */ + /* Set up page table for the vdso address space */ memset64((u64 *)segment_table, _SEGMENT_ENTRY_EMPTY, _CRST_ENTRIES); memset64((u64 *)page_table, _PAGE_INVALID, PTRS_PER_PTE); *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; *(unsigned long *) page_table = _PAGE_PROTECT + page_frame; - psal = (u32 *) (page_table + 256*sizeof(unsigned long)); - aste = psal + 32; - - for (i = 4; i < 32; i += 4) - psal[i] = 0x80000000; - - lowcore->paste[4] = (u32)(addr_t) psal; - psal[0] = 0x02000000; - psal[2] = (u32)(addr_t) aste; - *(unsigned long *) (aste + 2) = segment_table + + lowcore->vdso_asce = segment_table + _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; - aste[4] = (u32)(addr_t) psal; lowcore->vdso_per_cpu_data = page_frame; return 0; @@ -212,14 +195,8 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore) void vdso_free_per_cpu(struct lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; - u32 *psal, *aste; - if (!vdso_enabled) - return; - - psal = (u32 *)(addr_t) lowcore->paste[4]; - aste = (u32 *)(addr_t) psal[2]; - segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK; + segment_table = lowcore->vdso_asce & PAGE_MASK; page_table = *(unsigned long *) segment_table; page_frame = *(unsigned long *) page_table; @@ -228,16 +205,6 @@ void vdso_free_per_cpu(struct lowcore *lowcore) free_pages(segment_table, SEGMENT_ORDER); } -static void vdso_init_cr5(void) -{ - unsigned long cr5; - - if (!vdso_enabled) - return; - cr5 = offsetof(struct lowcore, paste); - __ctl_load(cr5, 5, 5); -} - /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree @@ -314,8 +281,6 @@ static int __init vdso_init(void) { int i; - if (!vdso_enabled) - return 0; vdso_init_data(vdso_data); #ifdef CONFIG_COMPAT /* Calculate the size of the 32 bit vDSO */ @@ -354,7 +319,6 @@ static int __init vdso_init(void) vdso64_pagelist[vdso64_pages] = NULL; if (vdso_alloc_per_cpu(&S390_lowcore)) BUG(); - vdso_init_cr5(); get_page(virt_to_page(vdso_data)); diff --git a/arch/s390/kernel/vdso32/getcpu.S b/arch/s390/kernel/vdso32/getcpu.S index 6e30769dd017..5477a2c112fb 100644 --- a/arch/s390/kernel/vdso32/getcpu.S +++ b/arch/s390/kernel/vdso32/getcpu.S @@ -15,23 +15,11 @@ .type __kernel_getcpu,@function __kernel_getcpu: .cfi_startproc - ear %r1,%a4 - lhi %r4,1 - sll %r4,24 - sar %a4,%r4 la %r4,0 - epsw %r0,0 - sacf 512 + sacf 256 l %r5,__VDSO_CPU_NR(%r4) l %r4,__VDSO_NODE_ID(%r4) - tml %r0,0x4000 - jo 1f - tml %r0,0x8000 - jno 0f - sacf 256 - j 1f -0: sacf 0 -1: sar %a4,%r1 + sacf 0 ltr %r2,%r2 jz 2f st %r5,0(%r2) diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 9c3b12626dba..5d7b56b49458 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -114,23 +114,12 @@ __kernel_clock_gettime: br %r14 /* CPUCLOCK_VIRT for this thread */ -9: icm %r0,15,__VDSO_ECTG_OK(%r5) +9: lghi %r4,0 + icm %r0,15,__VDSO_ECTG_OK(%r5) jz 12f - ear %r2,%a4 - llilh %r4,0x0100 - sar %a4,%r4 - lghi %r4,0 - epsw %r5,0 - sacf 512 /* Magic ectg instruction */ + sacf 256 /* Magic ectg instruction */ .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4 - tml %r5,0x4000 - jo 11f - tml %r5,0x8000 - jno 10f - sacf 256 - j 11f -10: sacf 0 -11: sar %a4,%r2 + sacf 0 algr %r1,%r0 /* r1 = cputime as TOD value */ mghi %r1,1000 /* convert to nanoseconds */ srlg %r1,%r1,12 /* r1 = cputime in nanosec */ diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S index 43983764b959..e9c34364d97b 100644 --- a/arch/s390/kernel/vdso64/getcpu.S +++ b/arch/s390/kernel/vdso64/getcpu.S @@ -15,22 +15,11 @@ .type __kernel_getcpu,@function __kernel_getcpu: .cfi_startproc - ear %r1,%a4 - llilh %r4,0x0100 - sar %a4,%r4 la %r4,0 - epsw %r0,0 - sacf 512 + sacf 256 l %r5,__VDSO_CPU_NR(%r4) l %r4,__VDSO_NODE_ID(%r4) - tml %r0,0x4000 - jo 1f - tml %r0,0x8000 - jno 0f - sacf 256 - j 1f -0: sacf 0 -1: sar %a4,%r1 + sacf 0 ltgr %r2,%r2 jz 2f st %r5,0(%r2) diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 802903c50de1..cae5a1e16cbd 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -40,10 +40,67 @@ static inline int copy_with_mvcos(void) } #endif +void set_fs(mm_segment_t fs) +{ + current->thread.mm_segment = fs; + if (fs == USER_DS) { + __ctl_load(S390_lowcore.user_asce, 1, 1); + clear_cpu_flag(CIF_ASCE_PRIMARY); + } else { + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + set_cpu_flag(CIF_ASCE_PRIMARY); + } + if (fs & 1) { + if (fs == USER_DS_SACF) + __ctl_load(S390_lowcore.user_asce, 7, 7); + else + __ctl_load(S390_lowcore.kernel_asce, 7, 7); + set_cpu_flag(CIF_ASCE_SECONDARY); + } +} +EXPORT_SYMBOL(set_fs); + +mm_segment_t enable_sacf_uaccess(void) +{ + mm_segment_t old_fs; + unsigned long asce, cr; + + old_fs = current->thread.mm_segment; + if (old_fs & 1) + return old_fs; + current->thread.mm_segment |= 1; + asce = S390_lowcore.kernel_asce; + if (likely(old_fs == USER_DS)) { + __ctl_store(cr, 1, 1); + if (cr != S390_lowcore.kernel_asce) { + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + set_cpu_flag(CIF_ASCE_PRIMARY); + } + asce = S390_lowcore.user_asce; + } + __ctl_store(cr, 7, 7); + if (cr != asce) { + __ctl_load(asce, 7, 7); + set_cpu_flag(CIF_ASCE_SECONDARY); + } + return old_fs; +} +EXPORT_SYMBOL(enable_sacf_uaccess); + +void disable_sacf_uaccess(mm_segment_t old_fs) +{ + if (old_fs == USER_DS && test_facility(27)) { + __ctl_load(S390_lowcore.user_asce, 1, 1); + clear_cpu_flag(CIF_ASCE_PRIMARY); + } + current->thread.mm_segment = old_fs; +} +EXPORT_SYMBOL(disable_sacf_uaccess); + static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, unsigned long size) { - register unsigned long reg0 asm("0") = 0x81UL; + register unsigned long reg0 asm("0") = 0x01UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -74,8 +131,9 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, unsigned long size) { unsigned long tmp1, tmp2; + mm_segment_t old_fs; - load_kernel_asce(); + old_fs = enable_sacf_uaccess(); tmp1 = -256UL; asm volatile( " sacf 0\n" @@ -102,6 +160,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) : : "cc", "memory"); + disable_sacf_uaccess(old_fs); return size; } @@ -116,7 +175,7 @@ EXPORT_SYMBOL(raw_copy_from_user); static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, unsigned long size) { - register unsigned long reg0 asm("0") = 0x810000UL; + register unsigned long reg0 asm("0") = 0x010000UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -147,8 +206,9 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, unsigned long size) { unsigned long tmp1, tmp2; + mm_segment_t old_fs; - load_kernel_asce(); + old_fs = enable_sacf_uaccess(); tmp1 = -256UL; asm volatile( " sacf 0\n" @@ -175,6 +235,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) : : "cc", "memory"); + disable_sacf_uaccess(old_fs); return size; } @@ -189,7 +250,7 @@ EXPORT_SYMBOL(raw_copy_to_user); static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from, unsigned long size) { - register unsigned long reg0 asm("0") = 0x810081UL; + register unsigned long reg0 asm("0") = 0x010001UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -212,9 +273,10 @@ static inline unsigned long copy_in_user_mvcos(void __user *to, const void __use static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from, unsigned long size) { + mm_segment_t old_fs; unsigned long tmp1; - load_kernel_asce(); + old_fs = enable_sacf_uaccess(); asm volatile( " sacf 256\n" " aghi %0,-1\n" @@ -238,6 +300,7 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1) : : "cc", "memory"); + disable_sacf_uaccess(old_fs); return size; } @@ -251,7 +314,7 @@ EXPORT_SYMBOL(raw_copy_in_user); static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) { - register unsigned long reg0 asm("0") = 0x810000UL; + register unsigned long reg0 asm("0") = 0x010000UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -279,9 +342,10 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size static inline unsigned long clear_user_xc(void __user *to, unsigned long size) { + mm_segment_t old_fs; unsigned long tmp1, tmp2; - load_kernel_asce(); + old_fs = enable_sacf_uaccess(); asm volatile( " sacf 256\n" " aghi %0,-1\n" @@ -310,6 +374,7 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size) EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2) : : "cc", "memory"); + disable_sacf_uaccess(old_fs); return size; } @@ -345,10 +410,15 @@ static inline unsigned long strnlen_user_srst(const char __user *src, unsigned long __strnlen_user(const char __user *src, unsigned long size) { + mm_segment_t old_fs; + unsigned long len; + if (unlikely(!size)) return 0; - load_kernel_asce(); - return strnlen_user_srst(src, size); + old_fs = enable_sacf_uaccess(); + len = strnlen_user_srst(src, size); + disable_sacf_uaccess(old_fs); + return len; } EXPORT_SYMBOL(__strnlen_user); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index be974b3eb7e4..14654007dce4 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -50,6 +50,13 @@ #define VM_FAULT_SIGNAL 0x080000 #define VM_FAULT_PFAULT 0x100000 +enum fault_type { + KERNEL_FAULT, + USER_FAULT, + VDSO_FAULT, + GMAP_FAULT, +}; + static unsigned long store_indication __read_mostly; static int __init fault_init(void) @@ -99,27 +106,34 @@ void bust_spinlocks(int yes) } /* - * Returns the address space associated with the fault. - * Returns 0 for kernel space and 1 for user space. + * Find out which address space caused the exception. + * Access register mode is impossible, ignore space == 3. */ -static inline int user_space_fault(struct pt_regs *regs) +static inline enum fault_type get_fault_type(struct pt_regs *regs) { unsigned long trans_exc_code; - /* - * The lowest two bits of the translation exception - * identification indicate which paging table was used. - */ trans_exc_code = regs->int_parm_long & 3; - if (trans_exc_code == 3) /* home space -> kernel */ - return 0; - if (user_mode(regs)) - return 1; - if (trans_exc_code == 2) /* secondary space -> set_fs */ - return current->thread.mm_segment.ar4; - if (test_pt_regs_flag(regs, PIF_GUEST_FAULT)) - return 1; - return 0; + if (likely(trans_exc_code == 0)) { + /* primary space exception */ + if (IS_ENABLED(CONFIG_PGSTE) && + test_pt_regs_flag(regs, PIF_GUEST_FAULT)) + return GMAP_FAULT; + if (current->thread.mm_segment == USER_DS) + return USER_FAULT; + return KERNEL_FAULT; + } + if (trans_exc_code == 2) { + /* secondary space exception */ + if (current->thread.mm_segment & 1) { + if (current->thread.mm_segment == USER_DS_SACF) + return USER_FAULT; + return KERNEL_FAULT; + } + return VDSO_FAULT; + } + /* home space exception -> access via kernel ASCE */ + return KERNEL_FAULT; } static int bad_address(void *p) @@ -204,20 +218,23 @@ static void dump_fault_info(struct pt_regs *regs) break; } pr_cont("mode while using "); - if (!user_space_fault(regs)) { - asce = S390_lowcore.kernel_asce; - pr_cont("kernel "); - } -#ifdef CONFIG_PGSTE - else if (test_pt_regs_flag(regs, PIF_GUEST_FAULT)) { - struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; - asce = gmap->asce; - pr_cont("gmap "); - } -#endif - else { + switch (get_fault_type(regs)) { + case USER_FAULT: asce = S390_lowcore.user_asce; pr_cont("user "); + break; + case VDSO_FAULT: + asce = S390_lowcore.vdso_asce; + pr_cont("vdso "); + break; + case GMAP_FAULT: + asce = ((struct gmap *) S390_lowcore.gmap)->asce; + pr_cont("gmap "); + break; + case KERNEL_FAULT: + asce = S390_lowcore.kernel_asce; + pr_cont("kernel "); + break; } pr_cont("ASCE.\n"); dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK); @@ -273,7 +290,7 @@ static noinline void do_no_context(struct pt_regs *regs) * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ - if (!user_space_fault(regs)) + if (get_fault_type(regs) == KERNEL_FAULT) printk(KERN_ALERT "Unable to handle kernel pointer dereference" " in virtual kernel address space\n"); else @@ -395,12 +412,11 @@ static noinline void do_fault_error(struct pt_regs *regs, int access, int fault) */ static inline int do_exception(struct pt_regs *regs, int access) { -#ifdef CONFIG_PGSTE struct gmap *gmap; -#endif struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; + enum fault_type type; unsigned long trans_exc_code; unsigned long address; unsigned int flags; @@ -425,8 +441,19 @@ static inline int do_exception(struct pt_regs *regs, int access) * user context. */ fault = VM_FAULT_BADCONTEXT; - if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm)) + type = get_fault_type(regs); + switch (type) { + case KERNEL_FAULT: goto out; + case VDSO_FAULT: + fault = VM_FAULT_BADMAP; + goto out; + case USER_FAULT: + case GMAP_FAULT: + if (faulthandler_disabled() || !mm) + goto out; + break; + } address = trans_exc_code & __FAIL_ADDR_MASK; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); @@ -437,10 +464,9 @@ static inline int do_exception(struct pt_regs *regs, int access) flags |= FAULT_FLAG_WRITE; down_read(&mm->mmap_sem); -#ifdef CONFIG_PGSTE - gmap = test_pt_regs_flag(regs, PIF_GUEST_FAULT) ? - (struct gmap *) S390_lowcore.gmap : NULL; - if (gmap) { + gmap = NULL; + if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) { + gmap = (struct gmap *) S390_lowcore.gmap; current->thread.gmap_addr = address; current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE); current->thread.gmap_int_code = regs->int_code & 0xffff; @@ -452,7 +478,6 @@ static inline int do_exception(struct pt_regs *regs, int access) if (gmap->pfault_enabled) flags |= FAULT_FLAG_RETRY_NOWAIT; } -#endif retry: fault = VM_FAULT_BADMAP; @@ -507,15 +532,14 @@ static inline int do_exception(struct pt_regs *regs, int access) regs, address); } if (fault & VM_FAULT_RETRY) { -#ifdef CONFIG_PGSTE - if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { + if (IS_ENABLED(CONFIG_PGSTE) && gmap && + (flags & FAULT_FLAG_RETRY_NOWAIT)) { /* FAULT_FLAG_RETRY_NOWAIT has been set, * mmap_sem has not been released */ current->thread.gmap_pfault = 1; fault = VM_FAULT_PFAULT; goto out_up; } -#endif /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ flags &= ~(FAULT_FLAG_ALLOW_RETRY | @@ -525,8 +549,7 @@ static inline int do_exception(struct pt_regs *regs, int access) goto retry; } } -#ifdef CONFIG_PGSTE - if (gmap) { + if (IS_ENABLED(CONFIG_PGSTE) && gmap) { address = __gmap_link(gmap, current->thread.gmap_addr, address); if (address == -EFAULT) { @@ -538,7 +561,6 @@ static inline int do_exception(struct pt_regs *regs, int access) goto out_up; } } -#endif fault = 0; out_up: up_read(&mm->mmap_sem); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 817c9e16e83e..671535e64aba 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -95,6 +95,7 @@ void __init paging_init(void) } init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; S390_lowcore.kernel_asce = init_mm.context.asce; + S390_lowcore.user_asce = S390_lowcore.kernel_asce; crst_table_init((unsigned long *) init_mm.pgd, pgd_type); vmem_map_init(); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 4ad4c4f77b4d..434a9564917b 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -71,10 +71,8 @@ static void __crst_table_upgrade(void *arg) { struct mm_struct *mm = arg; - if (current->active_mm == mm) { - clear_user_asce(); + if (current->active_mm == mm) set_user_asce(mm); - } __tlb_flush_local(); } From 11776eaa6568f5357542bf41b0c7bb90854137cc Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 13 Nov 2017 16:37:33 +0100 Subject: [PATCH 07/30] s390: correct some inline assembly constraints Inline assembly code changed in this patch should really use "Q" constraint "Memory reference without index register and with short displacement". The kernel does not compile with kasan support enabled otherwise (due to stack instrumentation). Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/include/asm/cpu_mf.h | 2 +- arch/s390/include/asm/lowcore.h | 4 ++-- arch/s390/include/asm/processor.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 05480e4cc5ca..7364130a29c8 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -167,7 +167,7 @@ static inline int lcctl(u64 ctl) " .insn s,0xb2840000,%1\n" " ipm %0\n" " srl %0,28\n" - : "=d" (cc) : "m" (ctl) : "cc"); + : "=d" (cc) : "Q" (ctl) : "cc"); return cc; } diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 2306fa17f6cd..ec6592e8ba36 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -188,14 +188,14 @@ extern struct lowcore *lowcore_ptr[]; static inline void set_prefix(__u32 address) { - asm volatile("spx %0" : : "m" (address) : "memory"); + asm volatile("spx %0" : : "Q" (address) : "memory"); } static inline __u32 store_prefix(void) { __u32 address; - asm volatile("stpx %0" : "=m" (address)); + asm volatile("stpx %0" : "=Q" (address)); return address; } diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 709351bce80e..bfbfad482289 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -245,7 +245,7 @@ static inline unsigned short stap(void) { unsigned short cpu_address; - asm volatile("stap %0" : "=m" (cpu_address)); + asm volatile("stap %0" : "=Q" (cpu_address)); return cpu_address; } From dfd4c4935de8ac39f22e0f65972140405fd27942 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 13 Nov 2017 16:58:34 +0100 Subject: [PATCH 08/30] s390/kbuild: get rid of a warning when compiling with KCOV This change fixes the following warning: warning: (KCOV) selects GCC_PLUGINS which has unmet direct dependencies (HAVE_GCC_PLUGINS && !COMPILE_TEST) Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 863a62a6de3c..f5beccbe74d8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -148,6 +148,7 @@ config S390 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX + select HAVE_GCC_PLUGINS select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 From 3c6153e8145f74870bad11fa4344fd20f1ad3aaf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 14 Nov 2017 13:35:14 +0100 Subject: [PATCH 09/30] s390/vdso: add missing boot_vdso_data declaration sparse says: arch/s390/kernel/vdso.c:150:18: warning: symbol 'boot_vdso_data' was not declared. Should it be static? Signed-off-by: Heiko Carstens --- arch/s390/include/asm/vdso.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index ae6261ef97d5..169d7604eb80 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -46,6 +46,7 @@ struct vdso_per_cpu_data { }; extern struct vdso_data *vdso_data; +extern struct vdso_data boot_vdso_data; void vdso_alloc_boot_cpu(struct lowcore *lowcore); int vdso_alloc_per_cpu(struct lowcore *lowcore); From 78ca4fe3bb166e913d278e504d93f09a8ba3139e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 14 Nov 2017 13:47:00 +0100 Subject: [PATCH 10/30] s390/spinlock: fix indentation checkpatch: WARNING: Statements should start on a tabstop #9499: FILE: arch/s390/lib/spinlock.c:231: + return; sparse: arch/s390/lib/spinlock.c:81 arch_load_niai4() warn: inconsistent indenting Signed-off-by: Heiko Carstens --- arch/s390/lib/spinlock.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 84c0faeaf7ea..30a7c8c29964 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -78,7 +78,7 @@ static inline int arch_load_niai4(int *lock) ALTERNATIVE("", ".long 0xb2fa0040", 49) /* NIAI 4 */ " l %0,%1\n" : "=d" (owner) : "Q" (*lock) : "memory"); - return owner; + return owner; } static inline int arch_cmpxchg_niai8(int *lock, int old, int new) @@ -226,9 +226,10 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) /* Try to get the lock if it is free. */ if (!owner) { new = (old & _Q_TAIL_MASK) | lockval; - if (arch_cmpxchg_niai8(&lp->lock, old, new)) + if (arch_cmpxchg_niai8(&lp->lock, old, new)) { /* Got the lock */ - return; + return; + } continue; } if (count-- >= 0) From 2be1da8d4d3fd7b09f5c6ab952bff5cef0677ade Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 14 Nov 2017 14:50:08 +0100 Subject: [PATCH 11/30] s390/mm: remove unused code Signed-off-by: Heiko Carstens --- arch/s390/mm/gmap.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 2f66290c9b92..b2c140193b0a 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1187,12 +1187,11 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, unsigned long *sgt) { - unsigned long asce, *pgt; + unsigned long *pgt; struct page *page; int i; BUG_ON(!gmap_is_shadow(sg)); - asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT; for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) { if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) continue; @@ -1245,12 +1244,11 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, unsigned long *r3t) { - unsigned long asce, *sgt; + unsigned long *sgt; struct page *page; int i; BUG_ON(!gmap_is_shadow(sg)); - asce = (unsigned long) r3t | _ASCE_TYPE_REGION3; for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) { if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) continue; @@ -1303,12 +1301,11 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, unsigned long *r2t) { - unsigned long asce, *r3t; + unsigned long *r3t; struct page *page; int i; BUG_ON(!gmap_is_shadow(sg)); - asce = (unsigned long) r2t | _ASCE_TYPE_REGION2; for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) { if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) continue; From a6de0a91d93a47f812cf43b96ba6e639de6df6d5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 14 Nov 2017 14:50:38 +0100 Subject: [PATCH 12/30] s390/nmi: remove unused code Signed-off-by: Heiko Carstens --- arch/s390/kernel/nmi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 3f3cda41f32a..6ff169253cae 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -191,7 +191,6 @@ static int notrace s390_check_registers(union mci mci, int umode) { union ctlreg2 cr2; int kill_task; - void *fpt_save_area; kill_task = 0; @@ -224,7 +223,6 @@ static int notrace s390_check_registers(union mci mci, int umode) if (!test_cpu_flag(CIF_FPU)) kill_task = 1; } - fpt_save_area = &S390_lowcore.floating_pt_save_area; if (!mci.fc) { /* * Floating point control register can't be restored. From 049a2c2d486e8cc82c5cd79fa479c5b105b109e9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 14 Nov 2017 15:20:24 +0100 Subject: [PATCH 13/30] s390: enable CPU alternatives unconditionally Remove the CPU_ALTERNATIVES config option and enable the code unconditionally. The config option was only added to avoid a conflict with the named saved segment support. Since that code is gone there is no reason to keep the CPU_ALTERNATIVES config option. Just enable it unconditionally to also reduce the number of config options and make it less likely that something breaks. Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 16 ---------------- arch/s390/include/asm/alternative.h | 20 +++----------------- arch/s390/kernel/Makefile | 3 +-- arch/s390/kernel/module.c | 15 ++++++--------- 4 files changed, 10 insertions(+), 44 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f5beccbe74d8..84767046daff 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -539,22 +539,6 @@ config ARCH_RANDOM If unsure, say Y. -config ALTERNATIVES - def_bool y - prompt "Patch optimized instructions for running CPU type" - help - When enabled the kernel code is compiled with additional - alternative instructions blocks optimized for newer CPU types. - These alternative instructions blocks are patched at kernel boot - time when running CPU supports them. This mechanism is used to - optimize some critical code paths (i.e. spinlocks) for newer CPUs - even if kernel is build to support older machine generations. - - This mechanism could be disabled by appending "noaltinstr" - option to the kernel command line. - - If unsure, say Y. - endmenu menu "Memory setup" diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index 6c268f6a51d3..a72002056b54 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -15,14 +15,9 @@ struct alt_instr { u8 replacementlen; /* length of new instruction */ } __packed; -#ifdef CONFIG_ALTERNATIVES -extern void apply_alternative_instructions(void); -extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); -#else -static inline void apply_alternative_instructions(void) {}; -static inline void apply_alternatives(struct alt_instr *start, - struct alt_instr *end) {}; -#endif +void apply_alternative_instructions(void); +void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + /* * |661: |662: |6620 |663: * +-----------+---------------------+ @@ -109,7 +104,6 @@ static inline void apply_alternatives(struct alt_instr *start, b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n" \ INSTR_LEN_SANITY_CHECK(altinstr_len(num)) -#ifdef CONFIG_ALTERNATIVES /* alternative assembly primitive: */ #define ALTERNATIVE(oldinstr, altinstr, facility) \ ".pushsection .altinstr_replacement, \"ax\"\n" \ @@ -130,14 +124,6 @@ static inline void apply_alternatives(struct alt_instr *start, ALTINSTR_ENTRY(facility1, 1) \ ALTINSTR_ENTRY(facility2, 2) \ ".popsection\n" -#else -/* Alternative instructions are disabled, let's put just oldinstr in */ -#define ALTERNATIVE(oldinstr, altinstr, facility) \ - oldinstr "\n" - -#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \ - oldinstr "\n" -#endif /* * Alternative instructions for different CPU types or capabilities. diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 83bc82001c06..0319f4e81ea4 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -59,7 +59,7 @@ obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o -obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o +obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o extra-y += head.o head64.o vmlinux.lds @@ -77,7 +77,6 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o -obj-$(CONFIG_ALTERNATIVES) += alternative.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 6d9f73bb4142..7b87991416fd 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -433,16 +433,13 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *s; char *secstrings; - if (IS_ENABLED(CONFIG_ALTERNATIVES)) { - secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - if (!strcmp(".altinstructions", - secstrings + s->sh_name)) { - /* patch .altinstructions */ - void *aseg = (void *)s->sh_addr; + secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { + if (!strcmp(".altinstructions", secstrings + s->sh_name)) { + /* patch .altinstructions */ + void *aseg = (void *)s->sh_addr; - apply_alternatives(aseg, aseg + s->sh_size); - } + apply_alternatives(aseg, aseg + s->sh_size); } } From 6470c0cc4840c229c6f1c270ebbcacb8e14f477d Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 15 Nov 2017 17:06:30 +0100 Subject: [PATCH 14/30] s390: Remove CONFIG_HARDENED_USERCOPY When running the crash tool on a s390 live system we get a kernel panic for reading memory within the kernel image: # uname -a Linux r3545011 4.14.0-rc8-00066-g1c9dbd4615fd #45 SMP PREEMPT Fri Nov 10 16:16:22 CET 2017 s390x s390x s390x GNU/Linux # crash /boot/vmlinux-devel /dev/mem # crash> rd 0x100000 usercopy: kernel memory exposure attempt detected from 0000000000100000 () (8 bytes) ------------[ cut here ]------------ kernel BUG at mm/usercopy.c:72! illegal operation: 0001 ilc:1 [#1] PREEMPT SMP. Modules linked in: CPU: 0 PID: 1461 Comm: crash Not tainted 4.14.0-rc8-00066-g1c9dbd4615fd-dirty #46 Hardware name: IBM 2827 H66 706 (z/VM 6.3.0) task: 000000001ad10100 task.stack: 000000001df78000 Krnl PSW : 0704d00180000000 000000000038165c (__check_object_size+0x164/0x1d0) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 Krnl GPRS: 0000000012440e1d 0000000080000000 0000000000000061 00000000001cabc0 00000000001cc6d6 0000000000000000 0000000000cc4ed2 0000000000001000 000003ffc22fdd20 0000000000000008 0000000000100008 0000000000000001 0000000000000008 0000000000100000 0000000000381658 000000001df7bc90 Krnl Code: 000000000038164c: c020004a1c4a larl %r2,cc4ee0 0000000000381652: c0e5fff2581b brasl %r14,1cc688 #0000000000381658: a7f40001 brc 15,38165a >000000000038165c: eb42000c000c srlg %r4,%r2,12 0000000000381662: eb32001c000c srlg %r3,%r2,28 0000000000381668: c0110003ffff lgfi %r1,262143 000000000038166e: ec31ff752065 clgrj %r3,%r1,2,381558 0000000000381674: a7f4ff67 brc 15,381542 Call Trace: ([<0000000000381658>] __check_object_size+0x160/0x1d0) [<000000000082263a>] read_mem+0xaa/0x130. [<0000000000386182>] __vfs_read+0x42/0x168. [<000000000038632e>] vfs_read+0x86/0x140. [<0000000000386a26>] SyS_read+0x66/0xc0. [<0000000000ace6a4>] system_call+0xc4/0x2b0. INFO: lockdep is turned off. Last Breaking-Event-Address: [<0000000000381658>] __check_object_size+0x160/0x1d0 Kernel panic - not syncing: Fatal exception: panic_on_oops With CONFIG_HARDENED_USERCOPY copy_to_user() checks in __check_object_size() if the source address is within the kernel image. When the crash tool reads from 0x100000, this check leads to the kernel BUG(). So disable the kernel config option until this bug is fixed. Corresponding bug report on LKML: https://lkml.org/lkml/2017/11/10/341 Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/configs/default_defconfig | 5 +---- arch/s390/configs/gcov_defconfig | 4 ---- arch/s390/configs/performance_defconfig | 4 ---- 3 files changed, 1 insertion(+), 12 deletions(-) diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 84eccc88c065..5af8458951cf 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -629,6 +629,7 @@ CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y +CONFIG_DMA_API_DEBUG=y CONFIG_LKDTM=m CONFIG_TEST_LIST_SORT=y CONFIG_TEST_SORT=y @@ -637,14 +638,12 @@ CONFIG_RBTREE_TEST=y CONFIG_INTERVAL_TREE_TEST=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y -CONFIG_DMA_API_DEBUG=y CONFIG_TEST_BPF=m CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_S390_PTDUMP=y CONFIG_ENCRYPTED_KEYS=m CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y -CONFIG_HARDENED_USERCOPY=y CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y @@ -660,13 +659,11 @@ CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index f7202358e6d7..d52eafe57ae8 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -587,7 +587,6 @@ CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y -CONFIG_HARDENED_USERCOPY=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 @@ -605,13 +604,10 @@ CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 03100fe74ea8..20ed149e1137 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -585,7 +585,6 @@ CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y -CONFIG_HARDENED_USERCOPY=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 @@ -603,13 +602,10 @@ CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m From b192571d1ae375e0bbe0aa3ccfa1a3c3704454b9 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 15 Nov 2017 14:15:36 +0100 Subject: [PATCH 15/30] s390/disassembler: increase show_code buffer size Current buffer size of 64 is too small. objdump shows that there are instructions which would require up to 75 bytes buffer (with current formating). 128 bytes "ought to be enough for anybody". Also replaces 8 spaces with a single tab to reduce the memory footprint. Fixes the following KASAN finding: BUG: KASAN: stack-out-of-bounds in number+0x3fe/0x538 Write of size 1 at addr 000000005a4a75a0 by task bash/1282 CPU: 1 PID: 1282 Comm: bash Not tainted 4.14.0+ #215 Hardware name: IBM 2964 N96 702 (z/VM 6.4.0) Call Trace: ([<000000000011eeb6>] show_stack+0x56/0x88) [<0000000000e1ce1a>] dump_stack+0x15a/0x1b0 [<00000000004e2994>] print_address_description+0xf4/0x288 [<00000000004e2cf2>] kasan_report+0x13a/0x230 [<0000000000e38ae6>] number+0x3fe/0x538 [<0000000000e3dfe4>] vsnprintf+0x194/0x948 [<0000000000e3ea42>] sprintf+0xa2/0xb8 [<00000000001198dc>] print_insn+0x374/0x500 [<0000000000119346>] show_code+0x4ee/0x538 [<000000000011f234>] show_registers+0x34c/0x388 [<000000000011f2ae>] show_regs+0x3e/0xa8 [<000000000011f502>] die+0x1ea/0x2e8 [<0000000000138f0e>] do_no_context+0x106/0x168 [<0000000000139a1a>] do_protection_exception+0x4da/0x7d0 [<0000000000e55914>] pgm_check_handler+0x16c/0x1c0 [<000000000090639e>] sysrq_handle_crash+0x46/0x58 ([<0000000000000007>] 0x7) [<00000000009073fa>] __handle_sysrq+0x102/0x218 [<0000000000907c06>] write_sysrq_trigger+0xd6/0x100 [<000000000061d67a>] proc_reg_write+0xb2/0x128 [<0000000000520be6>] __vfs_write+0xee/0x368 [<0000000000521222>] vfs_write+0x21a/0x278 [<000000000052156a>] SyS_write+0xda/0x178 [<0000000000e555cc>] system_call+0xc4/0x270 The buggy address belongs to the page: page:000003d1016929c0 count:0 mapcount:0 mapping: (null) index:0x0 flags: 0x0() raw: 0000000000000000 0000000000000000 0000000000000000 ffffffff00000000 raw: 0000000000000100 0000000000000200 0000000000000000 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: 000000005a4a7480: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 000000005a4a7500: 00 00 00 00 00 00 00 00 f2 f2 f2 f2 00 00 00 00 >000000005a4a7580: 00 00 00 00 f3 f3 f3 f3 00 00 00 00 00 00 00 00 ^ 000000005a4a7600: 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 f8 f8 000000005a4a7680: f2 f2 f2 f2 f2 f2 f8 f8 f2 f2 f3 f3 f3 f3 00 00 ================================================================== Cc: Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index b811d3a8417d..3be829721cf9 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -480,7 +480,7 @@ void show_code(struct pt_regs *regs) { char *mode = user_mode(regs) ? "User" : "Krnl"; unsigned char code[64]; - char buffer[64], *ptr; + char buffer[128], *ptr; mm_segment_t old_fs; unsigned long addr; int start, end, opsize, hops, i; @@ -543,7 +543,7 @@ void show_code(struct pt_regs *regs) start += opsize; pr_cont("%s", buffer); ptr = buffer; - ptr += sprintf(ptr, "\n "); + ptr += sprintf(ptr, "\n\t "); hops++; } pr_cont("\n"); From ca5955cdeae744edd3dcc65d677e833fc29658c2 Mon Sep 17 00:00:00 2001 From: Pu Hou Date: Fri, 11 Nov 2016 03:08:49 +0100 Subject: [PATCH 16/30] s390/cpumf: introduce AUX buffer for dump diagnostic sample data Current implementation uses a private buffer for cpumf to dump samples. Samples first go to this buffer. Then copy to ring buffer allocated by perf core. With AUX buffer, this copy is not needed. AUX buffer is shared and zero-copy mapped to user space. The trailer information at the end of each SDB(sample data block) is also exported to user space. AUX buffer is used when diagnostic sampling mode is enabled. This patch contains functions to setup/free AUX buffer or to begin/end sampling per-cpu. Also include function called in interrupt to collect samples. Signed-off-by: Pu Hou Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_sf.c | 446 ++++++++++++++++++++++++++++++++ 1 file changed, 446 insertions(+) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index bd4bbf61aaf3..51dbd8d90dbe 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -77,6 +77,15 @@ struct sf_buffer { unsigned long *tail; /* last sample-data-block-table */ }; +struct aux_buffer { + struct sf_buffer sfb; + unsigned long head; /* index of SDB of buffer head */ + unsigned long alert_mark; /* index of SDB of alert request position */ + unsigned long empty_mark; /* mark of SDB not marked full */ + unsigned long *sdb_index; /* SDB address for fast lookup */ + unsigned long *sdbt_index; /* SDBT address for fast lookup */ +}; + struct cpu_hw_sf { /* CPU-measurement sampling information block */ struct hws_qsi_info_block qsi; @@ -85,6 +94,7 @@ struct cpu_hw_sf { struct sf_buffer sfb; /* Sampling buffer */ unsigned int flags; /* Status flags */ struct perf_event *event; /* Scheduled perf event */ + struct perf_output_handle handle; /* AUX buffer output handle */ }; static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); @@ -1291,6 +1301,439 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) sampl_overflow, event_overflow); } +#define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb) +#define AUX_SDB_NUM(aux, start, end) (end >= start ? end - start + 1 : 0) +#define AUX_SDB_NUM_ALERT(aux) AUX_SDB_NUM(aux, aux->head, aux->alert_mark) +#define AUX_SDB_NUM_EMPTY(aux) AUX_SDB_NUM(aux, aux->head, aux->empty_mark) + +/* + * Get trailer entry by index of SDB. + */ +static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux, + unsigned long index) +{ + unsigned long sdb; + + index = AUX_SDB_INDEX(aux, index); + sdb = aux->sdb_index[index]; + return (struct hws_trailer_entry *)trailer_entry_ptr(sdb); +} + +/* + * Finish sampling on the cpu. Called by cpumsf_pmu_del() with pmu + * disabled. Collect the full SDBs in AUX buffer which have not reached + * the point of alert indicator. And ignore the SDBs which are not + * full. + * + * 1. Scan SDBs to see how much data is there and consume them. + * 2. Remove alert indicator in the buffer. + */ +static void aux_output_end(struct perf_output_handle *handle) +{ + unsigned long i, range_scan, idx; + struct aux_buffer *aux; + struct hws_trailer_entry *te; + + aux = perf_get_aux(handle); + if (!aux) + return; + + range_scan = AUX_SDB_NUM_ALERT(aux); + for (i = 0, idx = aux->head; i < range_scan; i++, idx++) { + te = aux_sdb_trailer(aux, idx); + if (!(te->flags & SDB_TE_BUFFER_FULL_MASK)) + break; + } + /* i is num of SDBs which are full */ + perf_aux_output_end(handle, i << PAGE_SHIFT); + + /* Remove alert indicators in the buffer */ + te = aux_sdb_trailer(aux, aux->alert_mark); + te->flags &= ~SDB_TE_ALERT_REQ_MASK; + + debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %lx SDBs\n", i); +} + +/* + * Start sampling on the CPU. Called by cpumsf_pmu_add() when an event + * is first added to the CPU or rescheduled again to the CPU. It is called + * with pmu disabled. + * + * 1. Reset the trailer of SDBs to get ready for new data. + * 2. Tell the hardware where to put the data by reset the SDBs buffer + * head(tear/dear). + */ +static int aux_output_begin(struct perf_output_handle *handle, + struct aux_buffer *aux, + struct cpu_hw_sf *cpuhw) +{ + unsigned long range; + unsigned long i, range_scan, idx; + unsigned long head, base, offset; + struct hws_trailer_entry *te; + + if (WARN_ON_ONCE(handle->head & ~PAGE_MASK)) + return -EINVAL; + + aux->head = handle->head >> PAGE_SHIFT; + range = (handle->size + 1) >> PAGE_SHIFT; + if (range <= 1) + return -ENOMEM; + + /* + * SDBs between aux->head and aux->empty_mark are already ready + * for new data. range_scan is num of SDBs not within them. + */ + if (range > AUX_SDB_NUM_EMPTY(aux)) { + range_scan = range - AUX_SDB_NUM_EMPTY(aux); + idx = aux->empty_mark + 1; + for (i = 0; i < range_scan; i++, idx++) { + te = aux_sdb_trailer(aux, idx); + te->flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; + te->flags = te->flags & ~SDB_TE_ALERT_REQ_MASK; + te->overflow = 0; + } + /* Save the position of empty SDBs */ + aux->empty_mark = aux->head + range - 1; + } + + /* Set alert indicator */ + aux->alert_mark = aux->head + range/2 - 1; + te = aux_sdb_trailer(aux, aux->alert_mark); + te->flags = te->flags | SDB_TE_ALERT_REQ_MASK; + + /* Reset hardware buffer head */ + head = AUX_SDB_INDEX(aux, aux->head); + base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE]; + offset = head % CPUM_SF_SDB_PER_TABLE; + cpuhw->lsctl.tear = base + offset * sizeof(unsigned long); + cpuhw->lsctl.dear = aux->sdb_index[head]; + + debug_sprintf_event(sfdbg, 6, "aux_output_begin: " + "head->alert_mark->empty_mark (num_alert, range)" + "[%lx -> %lx -> %lx] (%lx, %lx) " + "tear index %lx, tear %lx dear %lx\n", + aux->head, aux->alert_mark, aux->empty_mark, + AUX_SDB_NUM_ALERT(aux), range, + head / CPUM_SF_SDB_PER_TABLE, + cpuhw->lsctl.tear, + cpuhw->lsctl.dear); + + return 0; +} + +/* + * Set alert indicator on SDB at index @alert_index while sampler is running. + * + * Return true if successfully. + * Return false if full indicator is already set by hardware sampler. + */ +static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, + unsigned long long *overflow) +{ + unsigned long long orig_overflow, orig_flags, new_flags; + struct hws_trailer_entry *te; + + te = aux_sdb_trailer(aux, alert_index); + do { + orig_flags = te->flags; + orig_overflow = te->overflow; + *overflow = orig_overflow; + if (orig_flags & SDB_TE_BUFFER_FULL_MASK) { + /* + * SDB is already set by hardware. + * Abort and try to set somewhere + * behind. + */ + return false; + } + new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK; + } while (!cmpxchg_double(&te->flags, &te->overflow, + orig_flags, orig_overflow, + new_flags, 0ULL)); + return true; +} + +/* + * aux_reset_buffer() - Scan and setup SDBs for new samples + * @aux: The AUX buffer to set + * @range: The range of SDBs to scan started from aux->head + * @overflow: Set to overflow count + * + * Set alert indicator on the SDB at index of aux->alert_mark. If this SDB is + * marked as empty, check if it is already set full by the hardware sampler. + * If yes, that means new data is already there before we can set an alert + * indicator. Caller should try to set alert indicator to some position behind. + * + * Scan the SDBs in AUX buffer from behind aux->empty_mark. They are used + * previously and have already been consumed by user space. Reset these SDBs + * (clear full indicator and alert indicator) for new data. + * If aux->alert_mark fall in this area, just set it. Overflow count is + * recorded while scanning. + * + * SDBs between aux->head and aux->empty_mark are already reset at last time. + * and ready for new samples. So scanning on this area could be skipped. + * + * Return true if alert indicator is set successfully and false if not. + */ +static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, + unsigned long long *overflow) +{ + unsigned long long orig_overflow, orig_flags, new_flags; + unsigned long i, range_scan, idx; + struct hws_trailer_entry *te; + + if (range <= AUX_SDB_NUM_EMPTY(aux)) + /* + * No need to scan. All SDBs in range are marked as empty. + * Just set alert indicator. Should check race with hardware + * sampler. + */ + return aux_set_alert(aux, aux->alert_mark, overflow); + + if (aux->alert_mark <= aux->empty_mark) + /* + * Set alert indicator on empty SDB. Should check race + * with hardware sampler. + */ + if (!aux_set_alert(aux, aux->alert_mark, overflow)) + return false; + + /* + * Scan the SDBs to clear full and alert indicator used previously. + * Start scanning from one SDB behind empty_mark. If the new alert + * indicator fall into this range, set it. + */ + range_scan = range - AUX_SDB_NUM_EMPTY(aux); + idx = aux->empty_mark + 1; + for (i = 0; i < range_scan; i++, idx++) { + te = aux_sdb_trailer(aux, idx); + do { + orig_flags = te->flags; + orig_overflow = te->overflow; + new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK; + if (idx == aux->alert_mark) + new_flags |= SDB_TE_ALERT_REQ_MASK; + else + new_flags &= ~SDB_TE_ALERT_REQ_MASK; + } while (!cmpxchg_double(&te->flags, &te->overflow, + orig_flags, orig_overflow, + new_flags, 0ULL)); + *overflow += orig_overflow; + } + + /* Update empty_mark to new position */ + aux->empty_mark = aux->head + range - 1; + + return true; +} + +/* + * Measurement alert handler for diagnostic mode sampling. + */ +static void hw_collect_aux(struct cpu_hw_sf *cpuhw) +{ + struct aux_buffer *aux; + int done = 0; + unsigned long range = 0, size; + unsigned long long overflow = 0; + struct perf_output_handle *handle = &cpuhw->handle; + unsigned long num_sdb; + + aux = perf_get_aux(handle); + if (WARN_ON_ONCE(!aux)) + return; + + /* Inform user space new data arrived */ + size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT; + perf_aux_output_end(handle, size); + num_sdb = aux->sfb.num_sdb; + + while (!done) { + /* Get an output handle */ + aux = perf_aux_output_begin(handle, cpuhw->event); + if (handle->size == 0) { + pr_err("The AUX buffer with %lu pages for the " + "diagnostic-sampling mode is full\n", + num_sdb); + debug_sprintf_event(sfdbg, 1, "AUX buffer used up\n"); + break; + } + if (WARN_ON_ONCE(!aux)) + return; + + /* Update head and alert_mark to new position */ + aux->head = handle->head >> PAGE_SHIFT; + range = (handle->size + 1) >> PAGE_SHIFT; + if (range == 1) + aux->alert_mark = aux->head; + else + aux->alert_mark = aux->head + range/2 - 1; + + if (aux_reset_buffer(aux, range, &overflow)) { + if (!overflow) { + done = 1; + break; + } + size = range << PAGE_SHIFT; + perf_aux_output_end(&cpuhw->handle, size); + pr_err("Sample data caused the AUX buffer with %lu " + "pages to overflow\n", num_sdb); + debug_sprintf_event(sfdbg, 1, "head %lx range %lx " + "overflow %llx\n", + aux->head, range, overflow); + } else { + size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT; + perf_aux_output_end(&cpuhw->handle, size); + debug_sprintf_event(sfdbg, 6, "head %lx alert %lx " + "already full, try another\n", + aux->head, aux->alert_mark); + } + } + + if (done) + debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: " + "[%lx -> %lx -> %lx] (%lx, %lx)\n", + aux->head, aux->alert_mark, aux->empty_mark, + AUX_SDB_NUM_ALERT(aux), range); +} + +/* + * Callback when freeing AUX buffers. + */ +static void aux_buffer_free(void *data) +{ + struct aux_buffer *aux = data; + unsigned long i, num_sdbt; + + if (!aux) + return; + + /* Free SDBT. SDB is freed by the caller */ + num_sdbt = aux->sfb.num_sdbt; + for (i = 0; i < num_sdbt; i++) + free_page(aux->sdbt_index[i]); + + kfree(aux->sdbt_index); + kfree(aux->sdb_index); + kfree(aux); + + debug_sprintf_event(sfdbg, 4, "aux_buffer_free: free " + "%lu SDBTs\n", num_sdbt); +} + +/* + * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling + * @cpu: On which to allocate, -1 means current + * @pages: Array of pointers to buffer pages passed from perf core + * @nr_pages: Total pages + * @snapshot: Flag for snapshot mode + * + * This is the callback when setup an event using AUX buffer. Perf tool can + * trigger this by an additional mmap() call on the event. Unlike the buffer + * for basic samples, AUX buffer belongs to the event. It is scheduled with + * the task among online cpus when it is a per-thread event. + * + * Return the private AUX buffer structure if success or NULL if fails. + */ +static void *aux_buffer_setup(int cpu, void **pages, int nr_pages, + bool snapshot) +{ + struct sf_buffer *sfb; + struct aux_buffer *aux; + unsigned long *new, *tail; + int i, n_sdbt; + + if (!nr_pages || !pages) + return NULL; + + if (nr_pages > CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR) { + pr_err("AUX buffer size (%i pages) is larger than the " + "maximum sampling buffer limit\n", + nr_pages); + return NULL; + } else if (nr_pages < CPUM_SF_MIN_SDB * CPUM_SF_SDB_DIAG_FACTOR) { + pr_err("AUX buffer size (%i pages) is less than the " + "minimum sampling buffer limit\n", + nr_pages); + return NULL; + } + + /* Allocate aux_buffer struct for the event */ + aux = kmalloc(sizeof(struct aux_buffer), GFP_KERNEL); + if (!aux) + goto no_aux; + sfb = &aux->sfb; + + /* Allocate sdbt_index for fast reference */ + n_sdbt = (nr_pages + CPUM_SF_SDB_PER_TABLE - 1) / CPUM_SF_SDB_PER_TABLE; + aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL); + if (!aux->sdbt_index) + goto no_sdbt_index; + + /* Allocate sdb_index for fast reference */ + aux->sdb_index = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL); + if (!aux->sdb_index) + goto no_sdb_index; + + /* Allocate the first SDBT */ + sfb->num_sdbt = 0; + sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); + if (!sfb->sdbt) + goto no_sdbt; + aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)sfb->sdbt; + tail = sfb->tail = sfb->sdbt; + + /* + * Link the provided pages of AUX buffer to SDBT. + * Allocate SDBT if needed. + */ + for (i = 0; i < nr_pages; i++, tail++) { + if (require_table_link(tail)) { + new = (unsigned long *) get_zeroed_page(GFP_KERNEL); + if (!new) + goto no_sdbt; + aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new; + /* Link current page to tail of chain */ + *tail = (unsigned long)(void *) new + 1; + tail = new; + } + /* Tail is the entry in a SDBT */ + *tail = (unsigned long)pages[i]; + aux->sdb_index[i] = (unsigned long)pages[i]; + } + sfb->num_sdb = nr_pages; + + /* Link the last entry in the SDBT to the first SDBT */ + *tail = (unsigned long) sfb->sdbt + 1; + sfb->tail = tail; + + /* + * Initial all SDBs are zeroed. Mark it as empty. + * So there is no need to clear the full indicator + * when this event is first added. + */ + aux->empty_mark = sfb->num_sdb - 1; + + debug_sprintf_event(sfdbg, 4, "aux_buffer_setup: setup %lu SDBTs" + " and %lu SDBs\n", + sfb->num_sdbt, sfb->num_sdb); + + return aux; + +no_sdbt: + /* SDBs (AUX buffer pages) are freed by caller */ + for (i = 0; i < sfb->num_sdbt; i++) + free_page(aux->sdbt_index[i]); + kfree(aux->sdb_index); +no_sdb_index: + kfree(aux->sdbt_index); +no_sdbt_index: + kfree(aux); +no_aux: + return NULL; +} + static void cpumsf_pmu_read(struct perf_event *event) { /* Nothing to do ... updates are interrupt-driven */ @@ -1448,6 +1891,9 @@ static struct pmu cpumf_sampling = { .read = cpumsf_pmu_read, .attr_groups = cpumsf_pmu_attr_groups, + + .setup_aux = aux_buffer_setup, + .free_aux = aux_buffer_free, }; static void cpumf_measurement_alert(struct ext_code ext_code, From cbf6948f36afcbeca61ed5c3d5e7d930567a200f Mon Sep 17 00:00:00 2001 From: Pu Hou Date: Fri, 11 Nov 2016 04:23:15 +0100 Subject: [PATCH 17/30] s390/cpumf: enable using AUX buffer Modify PMU callback to use AUX buffer for diagnostic mode sampling. Basic-mode sampling still use orignal way. Signed-off-by: Pu Hou Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_sf.c | 55 ++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 51dbd8d90dbe..b9248a70b232 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -780,6 +780,10 @@ static int __hw_perf_event_init(struct perf_event *event) hwc->extra_reg.reg = REG_OVERFLOW; OVERFLOW_REG(hwc) = 0; + /* Use AUX buffer. No need to allocate it by ourself */ + if (attr->config == PERF_EVENT_CPUM_SF_DIAG) + return 0; + /* Allocate the per-CPU sampling buffer using the CPU information * from the event. If the event is not pinned to a particular * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling @@ -876,10 +880,15 @@ static void cpumsf_pmu_enable(struct pmu *pmu) */ if (cpuhw->event) { hwc = &cpuhw->event->hw; - /* Account number of overflow-designated buffer extents */ - sfb_account_overflows(cpuhw, hwc); - if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) - extend_sampling_buffer(&cpuhw->sfb, hwc); + if (!(SAMPL_DIAG_MODE(hwc))) { + /* + * Account number of overflow-designated + * buffer extents + */ + sfb_account_overflows(cpuhw, hwc); + if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) + extend_sampling_buffer(&cpuhw->sfb, hwc); + } } /* (Re)enable the PMU and sampling facility */ @@ -1225,6 +1234,13 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; int done; + /* + * AUX buffer is used when in diagnostic sampling mode. + * No perf events/samples are created. + */ + if (SAMPL_DIAG_MODE(&event->hw)) + return; + if (flush_all && SDB_FULL_BLOCKS(hwc)) flush_all = 0; @@ -1785,12 +1801,13 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags) static int cpumsf_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); + struct aux_buffer *aux; int err; if (cpuhw->flags & PMU_F_IN_USE) return -EAGAIN; - if (!cpuhw->sfb.sdbt) + if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt) return -EINVAL; err = 0; @@ -1805,10 +1822,12 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) */ cpuhw->lsctl.s = 0; cpuhw->lsctl.h = 1; - cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; - cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); - hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); + if (!SAMPL_DIAG_MODE(&event->hw)) { + cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; + cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; + hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); + } /* Ensure sampling functions are in the disabled state. If disabled, * switch on sampling enable control. */ @@ -1816,9 +1835,18 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) err = -EAGAIN; goto out; } - cpuhw->lsctl.es = 1; - if (SAMPL_DIAG_MODE(&event->hw)) + if (SAMPL_DIAG_MODE(&event->hw)) { + aux = perf_aux_output_begin(&cpuhw->handle, event); + if (!aux) { + err = -EINVAL; + goto out; + } + err = aux_output_begin(&cpuhw->handle, aux, cpuhw); + if (err) + goto out; cpuhw->lsctl.ed = 1; + } + cpuhw->lsctl.es = 1; /* Set in_use flag and store event */ cpuhw->event = event; @@ -1844,6 +1872,8 @@ static void cpumsf_pmu_del(struct perf_event *event, int flags) cpuhw->flags &= ~PMU_F_IN_USE; cpuhw->event = NULL; + if (SAMPL_DIAG_MODE(&event->hw)) + aux_output_end(&cpuhw->handle); perf_event_update_userpage(event); perf_pmu_enable(event->pmu); } @@ -1917,7 +1947,10 @@ static void cpumf_measurement_alert(struct ext_code ext_code, /* Program alert request */ if (alert & CPU_MF_INT_SF_PRA) { if (cpuhw->flags & PMU_F_IN_USE) - hw_perf_event_update(cpuhw->event, 0); + if (SAMPL_DIAG_MODE(&cpuhw->event->hw)) + hw_collect_aux(cpuhw); + else + hw_perf_event_update(cpuhw->event, 0); else WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE)); } From a3f22d505f56bf3efd8eecd9b7adcc40a40a4a8e Mon Sep 17 00:00:00 2001 From: Pu Hou Date: Thu, 1 Sep 2016 10:48:22 +0200 Subject: [PATCH 18/30] s390/perf: add callback to perf to enable using AUX buffer Perf tool need implement a callback to enable using AUX buffer. Perf will do another mmap() to trigger the setup of AUX buffer in kernel if there is such callback. The default size of the AUX buffer is set properly according to the sampling frequency to avoid overflow. It could also be manually set by -m option of perf. The interface of perf is not changed. Diagnostic mode sampling could be started by `perf record -e rBD000` like before. Signed-off-by: Pu Hou Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- tools/perf/arch/s390/util/Build | 2 + tools/perf/arch/s390/util/auxtrace.c | 118 +++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 tools/perf/arch/s390/util/auxtrace.c diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 5bd7b9260cc0..397084382b23 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -4,3 +4,5 @@ libperf-y += kvm-stat.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-y += machine.o + +libperf-$(CONFIG_AUXTRACE) += auxtrace.o diff --git a/tools/perf/arch/s390/util/auxtrace.c b/tools/perf/arch/s390/util/auxtrace.c new file mode 100644 index 000000000000..6cb48e4cffd9 --- /dev/null +++ b/tools/perf/arch/s390/util/auxtrace.c @@ -0,0 +1,118 @@ +#include +#include +#include +#include +#include + +#include "../../util/evlist.h" +#include "../../util/auxtrace.h" +#include "../../util/evsel.h" + +#define PERF_EVENT_CPUM_SF 0xB0000 /* Event: Basic-sampling */ +#define PERF_EVENT_CPUM_SF_DIAG 0xBD000 /* Event: Combined-sampling */ +#define DEFAULT_AUX_PAGES 128 +#define DEFAULT_FREQ 4000 + +static void cpumsf_free(struct auxtrace_record *itr) +{ + free(itr); +} + +static size_t cpumsf_info_priv_size(struct auxtrace_record *itr __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + return 0; +} + +static int +cpumsf_info_fill(struct auxtrace_record *itr __maybe_unused, + struct perf_session *session __maybe_unused, + struct auxtrace_info_event *auxtrace_info __maybe_unused, + size_t priv_size __maybe_unused) +{ + return 0; +} + +static unsigned long +cpumsf_reference(struct auxtrace_record *itr __maybe_unused) +{ + return 0; +} + +static int +cpumsf_recording_options(struct auxtrace_record *ar __maybe_unused, + struct perf_evlist *evlist __maybe_unused, + struct record_opts *opts) +{ + unsigned int factor = 1; + unsigned int pages; + + opts->full_auxtrace = true; + + /* + * The AUX buffer size should be set properly to avoid + * overflow of samples if it is not set explicitly. + * DEFAULT_AUX_PAGES is an proper size when sampling frequency + * is DEFAULT_FREQ. It is expected to hold about 1/2 second + * of sampling data. The size used for AUX buffer will scale + * according to the specified frequency and DEFAULT_FREQ. + */ + if (!opts->auxtrace_mmap_pages) { + if (opts->user_freq != UINT_MAX) + factor = (opts->user_freq + DEFAULT_FREQ + - 1) / DEFAULT_FREQ; + pages = DEFAULT_AUX_PAGES * factor; + opts->auxtrace_mmap_pages = roundup_pow_of_two(pages); + } + + return 0; +} + +static int +cpumsf_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused, + struct record_opts *opts __maybe_unused, + const char *str __maybe_unused) +{ + return 0; +} + +/* + * auxtrace_record__init is called when perf record + * check if the event really need auxtrace + */ +struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist, + int *err) +{ + struct auxtrace_record *aux; + struct perf_evsel *pos; + int diagnose = 0; + + if (evlist->nr_entries == 0) + return NULL; + + evlist__for_each_entry(evlist, pos) { + if (pos->attr.config == PERF_EVENT_CPUM_SF_DIAG) { + diagnose = 1; + break; + } + } + + if (!diagnose) + return NULL; + + /* sampling in diagnose mode. alloc aux buffer */ + aux = zalloc(sizeof(*aux)); + if (aux == NULL) { + *err = -ENOMEM; + return NULL; + } + + aux->parse_snapshot_options = cpumsf_parse_snapshot_options; + aux->recording_options = cpumsf_recording_options; + aux->info_priv_size = cpumsf_info_priv_size; + aux->info_fill = cpumsf_info_fill; + aux->free = cpumsf_free; + aux->reference = cpumsf_reference; + + return aux; +} From 3d43b981eb841a9493717e6d509f59553dbe8c7a Mon Sep 17 00:00:00 2001 From: Pu Hou Date: Fri, 19 May 2017 11:16:55 +0200 Subject: [PATCH 19/30] s390/cpumf: remove raw event support in basic-only sampling mode Raw sample was implemented to export the diagnostic samples. With having this achieved with AUX buffers, there is no requirement for basic samples to export raw data. In particular, most basic sampling information are consumed for creating the perf event sample. Signed-off-by: Pu Hou Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/perf_event.h | 17 --- arch/s390/kernel/perf_cpum_sf.c | 185 +++++------------------------ 2 files changed, 27 insertions(+), 175 deletions(-) diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 79aa6421fedb..d6c9d1e0dc2d 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -64,27 +64,10 @@ struct perf_sf_sde_regs { #define REG_OVERFLOW 1 #define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config) #define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc) -#define RAWSAMPLE_REG(hwc) ((hwc)->config) #define TEAR_REG(hwc) ((hwc)->last_tag) #define SAMPL_RATE(hwc) ((hwc)->event_base) #define SAMPL_FLAGS(hwc) ((hwc)->config_base) #define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE) #define SDB_FULL_BLOCKS(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS) -/* Structure for sampling data entries to be passed as perf raw sample data - * to user space. Note that raw sample data must be aligned and, thus, might - * be padded with zeros. - */ -struct sf_raw_sample { -#define SF_RAW_SAMPLE_BASIC PERF_CPUM_SF_BASIC_MODE -#define SF_RAW_SAMPLE_DIAG PERF_CPUM_SF_DIAG_MODE - u64 format; - u32 size; /* Size of sf_raw_sample */ - u16 bsdes; /* Basic-sampling data entry size */ - u16 dsdes; /* Diagnostic-sampling data entry size */ - struct hws_basic_entry basic; /* Basic-sampling data entry */ - struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ - u8 padding[]; /* Padding to next multiple of 8 */ -} __packed; - #endif /* _ASM_S390_PERF_EVENT_H */ diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index b9248a70b232..4d8ddd8bd9be 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -351,22 +351,6 @@ static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) sfb_account_allocs(num, hwc); } -static size_t event_sample_size(struct hw_perf_event *hwc) -{ - struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); - size_t sample_size; - - /* The sample size depends on the sampling function: The basic-sampling - * function must be always enabled, diagnostic-sampling function is - * optional. - */ - sample_size = sfr->bsdes; - if (SAMPL_DIAG_MODE(hwc)) - sample_size += sfr->dsdes; - - return sample_size; -} - static void deallocate_buffers(struct cpu_hw_sf *cpuhw) { if (cpuhw->sfb.sdbt) @@ -376,35 +360,7 @@ static void deallocate_buffers(struct cpu_hw_sf *cpuhw) static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) { unsigned long n_sdb, freq, factor; - size_t sfr_size, sample_size; - struct sf_raw_sample *sfr; - - /* Allocate raw sample buffer - * - * The raw sample buffer is used to temporarily store sampling data - * entries for perf raw sample processing. The buffer size mainly - * depends on the size of diagnostic-sampling data entries which is - * machine-specific. The exact size calculation includes: - * 1. The first 4 bytes of diagnostic-sampling data entries are - * already reflected in the sf_raw_sample structure. Subtract - * these bytes. - * 2. The perf raw sample data must be 8-byte aligned (u64) and - * perf's internal data size must be considered too. So add - * an additional u32 for correct alignment and subtract before - * allocating the buffer. - * 3. Store the raw sample buffer pointer in the perf event - * hardware structure. - */ - sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) + - sizeof(u32), sizeof(u64)); - sfr_size -= sizeof(u32); - sfr = kzalloc(sfr_size, GFP_KERNEL); - if (!sfr) - return -ENOMEM; - sfr->size = sfr_size; - sfr->bsdes = cpuhw->qsi.bsdes; - sfr->dsdes = cpuhw->qsi.dsdes; - RAWSAMPLE_REG(hwc) = (unsigned long) sfr; + size_t sample_size; /* Calculate sampling buffers using 4K pages * @@ -430,7 +386,7 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up * to 511 SDBs). */ - sample_size = event_sample_size(hwc); + sample_size = sizeof(struct hws_basic_entry); freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); factor = 1; n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size)); @@ -629,10 +585,6 @@ static int reserve_pmc_hardware(void) static void hw_perf_event_destroy(struct perf_event *event) { - /* Free raw sample buffer */ - if (RAWSAMPLE_REG(&event->hw)) - kfree((void *) RAWSAMPLE_REG(&event->hw)); - /* Release PMC if this is the last perf event */ if (!atomic_add_unless(&num_events, -1, 1)) { mutex_lock(&pmc_reserve_mutex); @@ -652,15 +604,8 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period) static void hw_reset_registers(struct hw_perf_event *hwc, unsigned long *sdbt_origin) { - struct sf_raw_sample *sfr; - /* (Re)set to first sample-data-block-table */ TEAR_REG(hwc) = (unsigned long) sdbt_origin; - - /* (Re)set raw sampling buffer register */ - sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); - memset(&sfr->basic, 0, sizeof(sfr->basic)); - memset(&sfr->diag, 0, sfr->dsdes); } static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, @@ -986,22 +931,16 @@ static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, * * Return non-zero if an event overflow occurred. */ -static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) +static int perf_push_sample(struct perf_event *event, + struct hws_basic_entry *basic) { int overflow; struct pt_regs regs; struct perf_sf_sde_regs *sde_regs; struct perf_sample_data data; - struct perf_raw_record raw = { - .frag = { - .size = sfr->size, - .data = sfr, - }, - }; /* Setup perf sample */ perf_sample_data_init(&data, 0, event->hw.last_period); - data.raw = &raw; /* Setup pt_regs to look like an CPU-measurement external interrupt * using the Program Request Alert code. The regs.int_parm_long @@ -1013,11 +952,11 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) regs.int_parm = CPU_MF_INT_SF_PRA; sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; - psw_bits(regs.psw).ia = sfr->basic.ia; - psw_bits(regs.psw).dat = sfr->basic.T; - psw_bits(regs.psw).wait = sfr->basic.W; - psw_bits(regs.psw).pstate = sfr->basic.P; - psw_bits(regs.psw).as = sfr->basic.AS; + psw_bits(regs.psw).ia = basic->ia; + psw_bits(regs.psw).dat = basic->T; + psw_bits(regs.psw).wait = basic->W; + psw_bits(regs.psw).pstate = basic->P; + psw_bits(regs.psw).as = basic->AS; /* * Use the hardware provided configuration level to decide if the @@ -1030,7 +969,7 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) * If the value differs from 0xffff (the host value), we assume to * be a KVM guest. */ - switch (sfr->basic.CL) { + switch (basic->CL) { case 1: /* logical partition */ sde_regs->in_guest = 0; break; @@ -1038,7 +977,7 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) sde_regs->in_guest = 1; break; default: /* old machine, use heuristics */ - if (sfr->basic.gpp || sfr->basic.prim_asn != 0xffff) + if (basic->gpp || basic->prim_asn != 0xffff) sde_regs->in_guest = 1; break; } @@ -1060,75 +999,12 @@ static void perf_event_count_update(struct perf_event *event, u64 count) local64_add(count, &event->count); } -static int sample_format_is_valid(struct hws_combined_entry *sample, - unsigned int flags) -{ - if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) - /* Only basic-sampling data entries with data-entry-format - * version of 0x0001 can be processed. - */ - if (sample->basic.def != 0x0001) - return 0; - if (flags & PERF_CPUM_SF_DIAG_MODE) - /* The data-entry-format number of diagnostic-sampling data - * entries can vary. Because diagnostic data is just passed - * through, do only a sanity check on the DEF. - */ - if (sample->diag.def < 0x8001) - return 0; - return 1; -} - -static int sample_is_consistent(struct hws_combined_entry *sample, - unsigned long flags) -{ - /* This check applies only to basic-sampling data entries of potentially - * combined-sampling data entries. Invalid entries cannot be processed - * by the PMU and, thus, do not deliver an associated - * diagnostic-sampling data entry. - */ - if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE))) - return 0; - /* - * Samples are skipped, if they are invalid or for which the - * instruction address is not predictable, i.e., the wait-state bit is - * set. - */ - if (sample->basic.I || sample->basic.W) - return 0; - return 1; -} - -static void reset_sample_slot(struct hws_combined_entry *sample, - unsigned long flags) -{ - if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) - sample->basic.def = 0; - if (flags & PERF_CPUM_SF_DIAG_MODE) - sample->diag.def = 0; -} - -static void sfr_store_sample(struct sf_raw_sample *sfr, - struct hws_combined_entry *sample) -{ - if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE)) - sfr->basic = sample->basic; - if (sfr->format & PERF_CPUM_SF_DIAG_MODE) - memcpy(&sfr->diag, &sample->diag, sfr->dsdes); -} - -static void debug_sample_entry(struct hws_combined_entry *sample, - struct hws_trailer_entry *te, - unsigned long flags) +static void debug_sample_entry(struct hws_basic_entry *sample, + struct hws_trailer_entry *te) { debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown " - "sampling data entry: te->f=%i basic.def=%04x (%p)" - " diag.def=%04x (%p)\n", te->f, - sample->basic.def, &sample->basic, - (flags & PERF_CPUM_SF_DIAG_MODE) - ? sample->diag.def : 0xFFFF, - (flags & PERF_CPUM_SF_DIAG_MODE) - ? &sample->diag : NULL); + "sampling data entry: te->f=%i basic.def=%04x (%p)\n", + te->f, sample->def, sample); } /* hw_collect_samples() - Walk through a sample-data-block and collect samples @@ -1154,44 +1030,37 @@ static void debug_sample_entry(struct hws_combined_entry *sample, static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, unsigned long long *overflow) { - unsigned long flags = SAMPL_FLAGS(&event->hw); - struct hws_combined_entry *sample; struct hws_trailer_entry *te; - struct sf_raw_sample *sfr; - size_t sample_size; + struct hws_basic_entry *sample; - /* Prepare and initialize raw sample data */ - sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw); - sfr->format = flags & PERF_CPUM_SF_MODE_MASK; - - sample_size = event_sample_size(&event->hw); te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); - sample = (struct hws_combined_entry *) *sdbt; + sample = (struct hws_basic_entry *) *sdbt; while ((unsigned long *) sample < (unsigned long *) te) { /* Check for an empty sample */ - if (!sample->basic.def) + if (!sample->def) break; /* Update perf event period */ perf_event_count_update(event, SAMPL_RATE(&event->hw)); - /* Check sampling data entry */ - if (sample_format_is_valid(sample, flags)) { + /* Check whether sample is valid */ + if (sample->def == 0x0001) { /* If an event overflow occurred, the PMU is stopped to * throttle event delivery. Remaining sample data is * discarded. */ if (!*overflow) { - if (sample_is_consistent(sample, flags)) { + /* Check whether sample is consistent */ + if (sample->I == 0 && sample->W == 0) { /* Deliver sample data to perf */ - sfr_store_sample(sfr, sample); - *overflow = perf_push_sample(event, sfr); + *overflow = perf_push_sample(event, + sample); } } else /* Count discarded samples */ *overflow += 1; } else { - debug_sample_entry(sample, te, flags); + debug_sample_entry(sample, te); /* Sample slot is not yet written or other record. * * This condition can occur if the buffer was reused @@ -1207,8 +1076,8 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, } /* Reset sample slot and advance to next sample */ - reset_sample_slot(sample, flags); - sample += sample_size; + sample->def = 0; + sample++; } } From 9232c3c741200167e44ae9d0e434092657ab4534 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 31 May 2017 15:22:19 +0200 Subject: [PATCH 20/30] s390/cpum_sf: do not register PMU if no sampling mode is authorized Previously, the cpum_sf PMU was registered even if there is no sampling mode authorized. Add a check and register cpum_sf only at least one sampling mode is authorized. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_sf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 4d8ddd8bd9be..7e9b9e6ee821 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1938,6 +1938,9 @@ static int __init init_cpum_sampling_pmu(void) return -ENODEV; } + if (!si.as && !si.ad) + return -ENODEV; + if (si.bsdes != sizeof(struct hws_basic_entry)) { pr_cpumsf_err(RS_INIT_FAILURE_BSDES); return -EINVAL; From c33eff600584ed493adfb42e3f130a6335f97750 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 6 Jun 2015 12:44:25 +0200 Subject: [PATCH 21/30] s390/perf: add perf_regs support and user stack dump Add s390 support to dump user stack to user space for DWARF stack unwinding. Signed-off-by: Heiko Carstens Reviewed-by: Hendrik Brueckner Reviewed-and-tested-by: Thomas Richter Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 2 ++ arch/s390/include/uapi/asm/perf_regs.h | 27 ++++++++++++++ arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/perf_regs.c | 49 ++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 arch/s390/include/uapi/asm/perf_regs.h create mode 100644 arch/s390/kernel/perf_regs.c diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 84767046daff..829c67986db7 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -159,6 +159,8 @@ config S390 select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_MEMBLOCK_PHYS_MAP diff --git a/arch/s390/include/uapi/asm/perf_regs.h b/arch/s390/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..f84ea6a181d3 --- /dev/null +++ b/arch/s390/include/uapi/asm/perf_regs.h @@ -0,0 +1,27 @@ +#ifndef _ASM_S390_PERF_REGS_H +#define _ASM_S390_PERF_REGS_H + +enum perf_event_s390_regs { + PERF_REG_S390_R0, + PERF_REG_S390_R1, + PERF_REG_S390_R2, + PERF_REG_S390_R3, + PERF_REG_S390_R4, + PERF_REG_S390_R5, + PERF_REG_S390_R6, + PERF_REG_S390_R7, + PERF_REG_S390_R8, + PERF_REG_S390_R9, + PERF_REG_S390_R10, + PERF_REG_S390_R11, + PERF_REG_S390_R12, + PERF_REG_S390_R13, + PERF_REG_S390_R14, + PERF_REG_S390_R15, + PERF_REG_S390_MASK, + PERF_REG_S390_PC, + + PERF_REG_S390_MAX +}; + +#endif /* _ASM_S390_PERF_REGS_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 0319f4e81ea4..909bce65cb2b 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -79,7 +79,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o -obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o +obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o obj-$(CONFIG_TRACEPOINTS) += trace.o diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c new file mode 100644 index 000000000000..e883e6a2146a --- /dev/null +++ b/arch/s390/kernel/perf_regs.c @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include +#include + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX)) + return 0; + + if (idx == PERF_REG_S390_MASK) + return regs->psw.mask; + if (idx == PERF_REG_S390_PC) + return regs->psw.addr; + + return regs->gprs[idx]; +} + +#define REG_RESERVED (~((1UL << PERF_REG_S390_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + if (test_tsk_thread_flag(task, TIF_31BIT)) + return PERF_SAMPLE_REGS_ABI_32; + + return PERF_SAMPLE_REGS_ABI_64; +} + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs, + struct pt_regs *regs_user_copy) +{ + /* + * Use the regs from the first interruption and let + * perf_sample_regs_intr() handle interrupts (regs == get_irq_regs()). + */ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} From f704ef44602fbf403e6722c7ed13f62d17e8cb20 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 19 Jan 2016 11:23:38 +0100 Subject: [PATCH 22/30] s390/perf: add support for perf_regs and libdw With support for perf_regs and libdw, you can record and report call graphs for user space programs. Simply invoke perf with the --call-graph=dwarf command line option. Signed-off-by: Heiko Carstens [brueckner: added dwfl_thread_state_register_pc() call] Signed-off-by: Hendrik Brueckner Reviewed-and-tested-by: Thomas Richter Signed-off-by: Martin Schwidefsky --- tools/perf/Makefile.config | 6 ++- tools/perf/arch/s390/include/perf_regs.h | 63 ++++++++++++++++++++++++ tools/perf/arch/s390/util/Build | 1 + tools/perf/arch/s390/util/dwarf-regs.c | 4 ++ tools/perf/arch/s390/util/unwind-libdw.c | 40 +++++++++++++++ 5 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 tools/perf/arch/s390/include/perf_regs.h create mode 100644 tools/perf/arch/s390/util/unwind-libdw.c diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 63f534a0902f..ed65e82f034e 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -53,6 +53,10 @@ ifeq ($(SRCARCH),arm64) LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif +ifeq ($(ARCH),s390) + NO_PERF_REGS := 0 +endif + ifeq ($(NO_PERF_REGS),0) $(call detected,CONFIG_PERF_REGS) endif @@ -61,7 +65,7 @@ endif # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures # to the check. -ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc)) +ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc s390)) NO_LIBDW_DWARF_UNWIND := 1 endif diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h new file mode 100644 index 000000000000..ac4b8c8c49e2 --- /dev/null +++ b/tools/perf/arch/s390/include/perf_regs.h @@ -0,0 +1,63 @@ +#ifndef ARCH_PERF_REGS_H +#define ARCH_PERF_REGS_H + +#include +#include +#include <../../../../arch/s390/include/uapi/asm/perf_regs.h> + +void perf_regs_load(u64 *regs); + +#define PERF_REGS_MASK ((1ULL << PERF_REG_S390_MAX) - 1) +#define PERF_REGS_MAX PERF_REG_S390_MAX +#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64 + +#define PERF_REG_IP PERF_REG_S390_PC +#define PERF_REG_SP PERF_REG_S390_R15 + +static inline const char *perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_S390_R0: + return "R0"; + case PERF_REG_S390_R1: + return "R1"; + case PERF_REG_S390_R2: + return "R2"; + case PERF_REG_S390_R3: + return "R3"; + case PERF_REG_S390_R4: + return "R4"; + case PERF_REG_S390_R5: + return "R5"; + case PERF_REG_S390_R6: + return "R6"; + case PERF_REG_S390_R7: + return "R7"; + case PERF_REG_S390_R8: + return "R8"; + case PERF_REG_S390_R9: + return "R9"; + case PERF_REG_S390_R10: + return "R10"; + case PERF_REG_S390_R11: + return "R11"; + case PERF_REG_S390_R12: + return "R12"; + case PERF_REG_S390_R13: + return "R13"; + case PERF_REG_S390_R14: + return "R14"; + case PERF_REG_S390_R15: + return "R15"; + case PERF_REG_S390_MASK: + return "MASK"; + case PERF_REG_S390_PC: + return "PC"; + default: + return NULL; + } + + return NULL; +} + +#endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 397084382b23..4a233683c684 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -2,6 +2,7 @@ libperf-y += header.o libperf-y += kvm-stat.o libperf-$(CONFIG_DWARF) += dwarf-regs.o +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o libperf-y += machine.o diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c index 0dff5b2ed1e5..0cd7cba5d6ee 100644 --- a/tools/perf/arch/s390/util/dwarf-regs.c +++ b/tools/perf/arch/s390/util/dwarf-regs.c @@ -19,5 +19,9 @@ static const char *gpr_names[NUM_GPRS] = { const char *get_arch_regstr(unsigned int n) { + if (n == 64) + return "mask"; + if (n == 65) + return "pc"; return (n >= NUM_GPRS) ? NULL : gpr_names[n]; } diff --git a/tools/perf/arch/s390/util/unwind-libdw.c b/tools/perf/arch/s390/util/unwind-libdw.c new file mode 100644 index 000000000000..281bbb82402a --- /dev/null +++ b/tools/perf/arch/s390/util/unwind-libdw.c @@ -0,0 +1,40 @@ +#include +#include "../../util/unwind-libdw.h" +#include "../../util/perf_regs.h" +#include "../../util/event.h" + + +bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) +{ + struct unwind_info *ui = arg; + struct regs_dump *user_regs = &ui->sample->user_regs; + Dwarf_Word dwarf_regs[PERF_REG_S390_MAX]; + +#define REG(r) ({ \ + Dwarf_Word val = 0; \ + perf_reg_value(&val, user_regs, PERF_REG_S390_##r); \ + val; \ +}) + + dwarf_regs[0] = REG(R0); + dwarf_regs[1] = REG(R1); + dwarf_regs[2] = REG(R2); + dwarf_regs[3] = REG(R3); + dwarf_regs[4] = REG(R4); + dwarf_regs[5] = REG(R5); + dwarf_regs[6] = REG(R6); + dwarf_regs[7] = REG(R7); + dwarf_regs[8] = REG(R8); + dwarf_regs[9] = REG(R9); + dwarf_regs[10] = REG(R10); + dwarf_regs[11] = REG(R11); + dwarf_regs[12] = REG(R12); + dwarf_regs[13] = REG(R13); + dwarf_regs[14] = REG(R14); + dwarf_regs[15] = REG(R15); + dwarf_regs[16] = REG(MASK); + dwarf_regs[17] = REG(PC); + + dwfl_thread_state_register_pc(thread, dwarf_regs[17]); + return dwfl_thread_state_registers(thread, 0, 16, dwarf_regs); +} From a9fc2db0a8abf51c81122799c5ae4808f2324b6d Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 8 Nov 2017 09:01:12 +0100 Subject: [PATCH 23/30] s390/perf: define common DWARF register string table Instead of defining DWARF register to string table in dwarf-regs-table.h and dwarf-regs.c, use a common table in dwarf-regs-table.h. Ensure that the DWARF register table is up-to-date with http://refspecs.linuxfoundation.org/ELF/zSeries/lzsabi0_s390/x1542.html. For unwinding with libdw, also ensure to correctly setup the DWARF register frame according to the register mappings. Currently, libdw supports up to 32 registers only. Suggested-by: Thomas Richter Signed-off-by: Hendrik Brueckner Reviewed-and-tested-by: Thomas Richter Signed-off-by: Martin Schwidefsky --- .../perf/arch/s390/include/dwarf-regs-table.h | 73 +++++++++++++++++-- tools/perf/arch/s390/util/dwarf-regs.c | 15 +--- tools/perf/arch/s390/util/unwind-libdw.c | 15 ++-- 3 files changed, 81 insertions(+), 22 deletions(-) diff --git a/tools/perf/arch/s390/include/dwarf-regs-table.h b/tools/perf/arch/s390/include/dwarf-regs-table.h index 792d4c277225..671553525f41 100644 --- a/tools/perf/arch/s390/include/dwarf-regs-table.h +++ b/tools/perf/arch/s390/include/dwarf-regs-table.h @@ -1,9 +1,72 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef S390_DWARF_REGS_TABLE_H +#define S390_DWARF_REGS_TABLE_H + +#define REG_DWARFNUM_NAME(reg, idx) [idx] = "%" #reg + +/* + * For reference, see DWARF register mapping: + * http://refspecs.linuxfoundation.org/ELF/zSeries/lzsabi0_s390/x1542.html + */ +static const char * const s390_dwarf_regs[] = { + "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", + REG_DWARFNUM_NAME(f0, 16), + REG_DWARFNUM_NAME(f1, 20), + REG_DWARFNUM_NAME(f2, 17), + REG_DWARFNUM_NAME(f3, 21), + REG_DWARFNUM_NAME(f4, 18), + REG_DWARFNUM_NAME(f5, 22), + REG_DWARFNUM_NAME(f6, 19), + REG_DWARFNUM_NAME(f7, 23), + REG_DWARFNUM_NAME(f8, 24), + REG_DWARFNUM_NAME(f9, 28), + REG_DWARFNUM_NAME(f10, 25), + REG_DWARFNUM_NAME(f11, 29), + REG_DWARFNUM_NAME(f12, 26), + REG_DWARFNUM_NAME(f13, 30), + REG_DWARFNUM_NAME(f14, 27), + REG_DWARFNUM_NAME(f15, 31), + REG_DWARFNUM_NAME(c0, 32), + REG_DWARFNUM_NAME(c1, 33), + REG_DWARFNUM_NAME(c2, 34), + REG_DWARFNUM_NAME(c3, 35), + REG_DWARFNUM_NAME(c4, 36), + REG_DWARFNUM_NAME(c5, 37), + REG_DWARFNUM_NAME(c6, 38), + REG_DWARFNUM_NAME(c7, 39), + REG_DWARFNUM_NAME(c8, 40), + REG_DWARFNUM_NAME(c9, 41), + REG_DWARFNUM_NAME(c10, 42), + REG_DWARFNUM_NAME(c11, 43), + REG_DWARFNUM_NAME(c12, 44), + REG_DWARFNUM_NAME(c13, 45), + REG_DWARFNUM_NAME(c14, 46), + REG_DWARFNUM_NAME(c15, 47), + REG_DWARFNUM_NAME(a0, 48), + REG_DWARFNUM_NAME(a1, 49), + REG_DWARFNUM_NAME(a2, 50), + REG_DWARFNUM_NAME(a3, 51), + REG_DWARFNUM_NAME(a4, 52), + REG_DWARFNUM_NAME(a5, 53), + REG_DWARFNUM_NAME(a6, 54), + REG_DWARFNUM_NAME(a7, 55), + REG_DWARFNUM_NAME(a8, 56), + REG_DWARFNUM_NAME(a9, 57), + REG_DWARFNUM_NAME(a10, 58), + REG_DWARFNUM_NAME(a11, 59), + REG_DWARFNUM_NAME(a12, 60), + REG_DWARFNUM_NAME(a13, 61), + REG_DWARFNUM_NAME(a14, 62), + REG_DWARFNUM_NAME(a15, 63), + REG_DWARFNUM_NAME(pswm, 64), + REG_DWARFNUM_NAME(pswa, 65), +}; + #ifdef DEFINE_DWARF_REGSTR_TABLE /* This is included in perf/util/dwarf-regs.c */ -static const char * const s390_regstr_tbl[] = { - "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", - "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", -}; -#endif +#define s390_regstr_tbl s390_dwarf_regs + +#endif /* DEFINE_DWARF_REGSTR_TABLE */ +#endif /* S390_DWARF_REGS_TABLE_H */ diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c index 0cd7cba5d6ee..f47576ce13ea 100644 --- a/tools/perf/arch/s390/util/dwarf-regs.c +++ b/tools/perf/arch/s390/util/dwarf-regs.c @@ -9,19 +9,10 @@ #include #include - -#define NUM_GPRS 16 - -static const char *gpr_names[NUM_GPRS] = { - "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", - "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", -}; +#include +#include "dwarf-regs-table.h" const char *get_arch_regstr(unsigned int n) { - if (n == 64) - return "mask"; - if (n == 65) - return "pc"; - return (n >= NUM_GPRS) ? NULL : gpr_names[n]; + return (n >= ARRAY_SIZE(s390_dwarf_regs)) ? NULL : s390_dwarf_regs[n]; } diff --git a/tools/perf/arch/s390/util/unwind-libdw.c b/tools/perf/arch/s390/util/unwind-libdw.c index 281bbb82402a..ba8d98b8b09e 100644 --- a/tools/perf/arch/s390/util/unwind-libdw.c +++ b/tools/perf/arch/s390/util/unwind-libdw.c @@ -1,21 +1,26 @@ +#include #include #include "../../util/unwind-libdw.h" #include "../../util/perf_regs.h" #include "../../util/event.h" +#include "dwarf-regs-table.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { struct unwind_info *ui = arg; struct regs_dump *user_regs = &ui->sample->user_regs; - Dwarf_Word dwarf_regs[PERF_REG_S390_MAX]; + Dwarf_Word dwarf_regs[ARRAY_SIZE(s390_dwarf_regs)]; #define REG(r) ({ \ Dwarf_Word val = 0; \ perf_reg_value(&val, user_regs, PERF_REG_S390_##r); \ val; \ }) - + /* + * For DWARF register mapping details, + * see also perf/arch/s390/include/dwarf-regs-table.h + */ dwarf_regs[0] = REG(R0); dwarf_regs[1] = REG(R1); dwarf_regs[2] = REG(R2); @@ -32,9 +37,9 @@ bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) dwarf_regs[13] = REG(R13); dwarf_regs[14] = REG(R14); dwarf_regs[15] = REG(R15); - dwarf_regs[16] = REG(MASK); - dwarf_regs[17] = REG(PC); + dwarf_regs[64] = REG(MASK); + dwarf_regs[65] = REG(PC); - dwfl_thread_state_register_pc(thread, dwarf_regs[17]); + dwfl_thread_state_register_pc(thread, dwarf_regs[65]); return dwfl_thread_state_registers(thread, 0, 16, dwarf_regs); } From 0da0017f72554c005c1a04c3adc5da9eb64fa7e5 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 8 Nov 2017 07:30:15 +0100 Subject: [PATCH 24/30] s390/perf: extend perf_regs support to include floating-point registers Extend the perf register support to also export floating-point register contents for user space tasks. Floating-point registers might be used in leaf functions to contain the return address. Hence, they are required for proper DWARF unwinding. Signed-off-by: Hendrik Brueckner Reviewed-and-tested-by: Thomas Richter Signed-off-by: Martin Schwidefsky --- arch/s390/include/uapi/asm/perf_regs.h | 16 ++++++++++++++++ arch/s390/kernel/perf_regs.c | 21 +++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/arch/s390/include/uapi/asm/perf_regs.h b/arch/s390/include/uapi/asm/perf_regs.h index f84ea6a181d3..7c8564f98205 100644 --- a/arch/s390/include/uapi/asm/perf_regs.h +++ b/arch/s390/include/uapi/asm/perf_regs.h @@ -18,6 +18,22 @@ enum perf_event_s390_regs { PERF_REG_S390_R13, PERF_REG_S390_R14, PERF_REG_S390_R15, + PERF_REG_S390_FP0, + PERF_REG_S390_FP1, + PERF_REG_S390_FP2, + PERF_REG_S390_FP3, + PERF_REG_S390_FP4, + PERF_REG_S390_FP5, + PERF_REG_S390_FP6, + PERF_REG_S390_FP7, + PERF_REG_S390_FP8, + PERF_REG_S390_FP9, + PERF_REG_S390_FP10, + PERF_REG_S390_FP11, + PERF_REG_S390_FP12, + PERF_REG_S390_FP13, + PERF_REG_S390_FP14, + PERF_REG_S390_FP15, PERF_REG_S390_MASK, PERF_REG_S390_PC, diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c index e883e6a2146a..f8603ebed669 100644 --- a/arch/s390/kernel/perf_regs.c +++ b/arch/s390/kernel/perf_regs.c @@ -4,12 +4,29 @@ #include #include #include +#include +#include u64 perf_reg_value(struct pt_regs *regs, int idx) { + freg_t fp; + if (WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX)) return 0; + if (idx >= PERF_REG_S390_R0 && idx <= PERF_REG_S390_R15) + return regs->gprs[idx]; + + if (idx >= PERF_REG_S390_FP0 && idx <= PERF_REG_S390_FP15) { + if (!user_mode(regs)) + return 0; + + idx -= PERF_REG_S390_FP0; + fp = MACHINE_HAS_VX ? *(freg_t *)(current->thread.fpu.vxrs + idx) + : current->thread.fpu.fprs[idx]; + return fp.ui; + } + if (idx == PERF_REG_S390_MASK) return regs->psw.mask; if (idx == PERF_REG_S390_PC) @@ -43,7 +60,11 @@ void perf_get_regs_user(struct perf_regs *regs_user, /* * Use the regs from the first interruption and let * perf_sample_regs_intr() handle interrupts (regs == get_irq_regs()). + * + * Also save FPU registers for user-space tasks only. */ regs_user->regs = task_pt_regs(current); + if (user_mode(regs_user->regs)) + save_fpu_regs(); regs_user->abi = perf_reg_abi(current); } From de9954b75e992d26a67d83990e6219dcb6edc3bf Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 8 Nov 2017 09:17:38 +0100 Subject: [PATCH 25/30] s390/perf: add perf register support for floating-point registers For correct unwinding of user space processes, the floating-point register contents are required. For example, leaf functions might use fp registers to temporarily store the return address. Signed-off-by: Hendrik Brueckner Reviewed-and-tested-by: Thomas Richter Signed-off-by: Martin Schwidefsky --- tools/perf/arch/s390/include/perf_regs.h | 32 ++++++++++++++++++++++++ tools/perf/arch/s390/util/unwind-libdw.c | 20 ++++++++++++++- 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h index ac4b8c8c49e2..d2df54a6bc5a 100644 --- a/tools/perf/arch/s390/include/perf_regs.h +++ b/tools/perf/arch/s390/include/perf_regs.h @@ -49,6 +49,38 @@ static inline const char *perf_reg_name(int id) return "R14"; case PERF_REG_S390_R15: return "R15"; + case PERF_REG_S390_FP0: + return "FP0"; + case PERF_REG_S390_FP1: + return "FP1"; + case PERF_REG_S390_FP2: + return "FP2"; + case PERF_REG_S390_FP3: + return "FP3"; + case PERF_REG_S390_FP4: + return "FP4"; + case PERF_REG_S390_FP5: + return "FP5"; + case PERF_REG_S390_FP6: + return "FP6"; + case PERF_REG_S390_FP7: + return "FP7"; + case PERF_REG_S390_FP8: + return "FP8"; + case PERF_REG_S390_FP9: + return "FP9"; + case PERF_REG_S390_FP10: + return "FP10"; + case PERF_REG_S390_FP11: + return "FP11"; + case PERF_REG_S390_FP12: + return "FP12"; + case PERF_REG_S390_FP13: + return "FP13"; + case PERF_REG_S390_FP14: + return "FP14"; + case PERF_REG_S390_FP15: + return "FP15"; case PERF_REG_S390_MASK: return "MASK"; case PERF_REG_S390_PC: diff --git a/tools/perf/arch/s390/util/unwind-libdw.c b/tools/perf/arch/s390/util/unwind-libdw.c index ba8d98b8b09e..387c698cdd1b 100644 --- a/tools/perf/arch/s390/util/unwind-libdw.c +++ b/tools/perf/arch/s390/util/unwind-libdw.c @@ -37,9 +37,27 @@ bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) dwarf_regs[13] = REG(R13); dwarf_regs[14] = REG(R14); dwarf_regs[15] = REG(R15); + + dwarf_regs[16] = REG(FP0); + dwarf_regs[17] = REG(FP2); + dwarf_regs[18] = REG(FP4); + dwarf_regs[19] = REG(FP6); + dwarf_regs[20] = REG(FP1); + dwarf_regs[21] = REG(FP3); + dwarf_regs[22] = REG(FP5); + dwarf_regs[23] = REG(FP7); + dwarf_regs[24] = REG(FP8); + dwarf_regs[25] = REG(FP10); + dwarf_regs[26] = REG(FP12); + dwarf_regs[27] = REG(FP14); + dwarf_regs[28] = REG(FP9); + dwarf_regs[29] = REG(FP11); + dwarf_regs[30] = REG(FP13); + dwarf_regs[31] = REG(FP15); + dwarf_regs[64] = REG(MASK); dwarf_regs[65] = REG(PC); dwfl_thread_state_register_pc(thread, dwarf_regs[65]); - return dwfl_thread_state_registers(thread, 0, 16, dwarf_regs); + return dwfl_thread_state_registers(thread, 0, 32, dwarf_regs); } From d4c7e649d7bf17792629dbeaf25945e26a32894f Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 27 Oct 2017 15:45:19 +0200 Subject: [PATCH 26/30] s390/cpum_sf: load program parameter at sampler enablement The lpp instruction is used to place the PID of the current task in the program-parameter (PP) register. The register contents is then included in the sampling data entries. The lpp instruction loads the PP register only when at least one sampling function is enabled. Otherwise it is executed as a no-op. Linux calls lpp at context switch. If the context switch happens before the sampler is enabled, the PP register is empty. That means, the PID of the task that is sampled is not stored in sampling data until the next context switch. Hence, always call lpp when enabling the sampler. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mf.h | 6 ++++++ arch/s390/kernel/perf_cpum_sf.c | 3 +++ 2 files changed, 9 insertions(+) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 7364130a29c8..792cda339af1 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -144,6 +144,12 @@ struct hws_trailer_entry { unsigned long long progusage2; /* */ } __packed; +/* Load program parameter */ +static inline void lpp(void *pp) +{ + asm volatile(".insn s,0xb2800000,0(%0)\n":: "a" (pp) : "memory"); +} + /* Query counter information */ static inline int qctri(struct cpumf_ctr_info *info) { diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 7e9b9e6ee821..dbb62c05805d 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -848,6 +848,9 @@ static void cpumsf_pmu_enable(struct pmu *pmu) return; } + /* Load current program parameter */ + lpp(&S390_lowcore.lpp); + debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i " "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed, cpuhw->lsctl.cd, From 544e8dd7a8e49d22b4315fc232479bc02b417b46 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Tue, 8 Mar 2016 14:00:23 +0100 Subject: [PATCH 27/30] s390/cpum_sf: correctly set the PID and TID in perf samples The hardware sampler creates samples that are processed at a later point in time. The PID and TID values of the perf samples that are created for hardware samples are initialized with values from the current task. Hence, the PID and TID values are not correct and perf samples are associated with wrong processes. The PID and TID values are obtained from the Host Program Parameter (HPP) field in the basic-sampling data entries. These PIDs are valid in the init PID namespace. Ensure that the PIDs in the perf samples are resolved considering the PID namespace in which the perf event was created. To correct the PID and TID values in the created perf samples, a special overflow handler is installed. It replaces the default overflow handler and does not become effective if any other overflow handler is used. With the special overflow handler most of the perf samples are associated with the right processes. For processes, that are no longer exist, the association might still be wrong. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/setup.h | 2 +- arch/s390/kernel/perf_cpum_sf.c | 76 +++++++++++++++++++++++++++++++++ arch/s390/mm/fault.c | 2 +- 3 files changed, 78 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 8bc87dcb10eb..2eb0c8a7b664 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -36,7 +36,7 @@ #define MACHINE_FLAG_SCC _BITUL(17) #define LPP_MAGIC _BITUL(31) -#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) +#define LPP_PID_MASK _AC(0xffffffff, UL) #ifndef __ASSEMBLY__ diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index dbb62c05805d..227b38bd82c9 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -615,6 +616,67 @@ static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, si->min_sampl_rate, si->max_sampl_rate); } +static u32 cpumsf_pid_type(struct perf_event *event, + u32 pid, enum pid_type type) +{ + struct task_struct *tsk; + + /* Idle process */ + if (!pid) + goto out; + + tsk = find_task_by_pid_ns(pid, &init_pid_ns); + pid = -1; + if (tsk) { + /* + * Only top level events contain the pid namespace in which + * they are created. + */ + if (event->parent) + event = event->parent; + pid = __task_pid_nr_ns(tsk, type, event->ns); + /* + * See also 1d953111b648 + * "perf/core: Don't report zero PIDs for exiting tasks". + */ + if (!pid && !pid_alive(tsk)) + pid = -1; + } +out: + return pid; +} + +static void cpumsf_output_event_pid(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + u32 pid; + struct perf_event_header header; + struct perf_output_handle handle; + + /* + * Obtain the PID from the basic-sampling data entry and + * correct the data->tid_entry.pid value. + */ + pid = data->tid_entry.pid; + + /* Protect callchain buffers, tasks */ + rcu_read_lock(); + + perf_prepare_sample(&header, data, event, regs); + if (perf_output_begin(&handle, event, header.size)) + goto out; + + /* Update the process ID (see also kernel/events/core.c) */ + data->tid_entry.pid = cpumsf_pid_type(event, pid, __PIDTYPE_TGID); + data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID); + + perf_output_sample(&handle, &header, data, event); + perf_output_end(&handle); +out: + rcu_read_unlock(); +} + static int __hw_perf_event_init(struct perf_event *event) { struct cpu_hw_sf *cpuhw; @@ -748,6 +810,14 @@ static int __hw_perf_event_init(struct perf_event *event) break; } } + + /* If PID/TID sampling is active, replace the default overflow + * handler to extract and resolve the PIDs from the basic-sampling + * data entries. + */ + if (event->attr.sample_type & PERF_SAMPLE_TID) + if (is_default_overflow_handler(event)) + event->overflow_handler = cpumsf_output_event_pid; out: return err; } @@ -985,6 +1055,12 @@ static int perf_push_sample(struct perf_event *event, break; } + /* + * Store the PID value from the sample-data-entry to be + * processed and resolved by cpumsf_output_event_pid(). + */ + data.tid_entry.pid = basic->hpp & LPP_PID_MASK; + overflow = 0; if (perf_exclude_event(event, ®s, sde_regs)) goto out; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 14654007dce4..93faeca52284 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -728,7 +728,7 @@ static void pfault_interrupt(struct ext_code ext_code, return; inc_irq_stat(IRQEXT_PFL); /* Get the token (= pid of the affected task). */ - pid = param64 & LPP_PFAULT_PID_MASK; + pid = param64 & LPP_PID_MASK; rcu_read_lock(); tsk = find_task_by_pid_ns(pid, &init_pid_ns); if (tsk) From 75492a51568b943e903e1d7de10a962fbc001ba6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 16:44:34 -0700 Subject: [PATCH 28/30] s390/scsi: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Steffen Maier Cc: Benjamin Block Cc: Heiko Carstens Cc: linux-s390@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Martin Schwidefsky --- drivers/s390/scsi/zfcp_erp.c | 16 ++++++++++------ drivers/s390/scsi/zfcp_ext.h | 2 +- drivers/s390/scsi/zfcp_fsf.c | 13 ++++++------- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index cbb8156bf5e0..822a852d578e 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -56,6 +56,8 @@ enum zfcp_erp_act_result { ZFCP_ERP_NOMEM = 5, }; +static void zfcp_erp_memwait_handler(struct timer_list *t); + static void zfcp_erp_adapter_block(struct zfcp_adapter *adapter, int mask) { zfcp_erp_clear_adapter_status(adapter, @@ -237,6 +239,7 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status, erp_action->fsf_req_id = 0; erp_action->action = need; erp_action->status = act_status; + timer_setup(&erp_action->timer, zfcp_erp_memwait_handler, 0); return erp_action; } @@ -564,21 +567,22 @@ void zfcp_erp_notify(struct zfcp_erp_action *erp_action, unsigned long set_mask) * zfcp_erp_timeout_handler - Trigger ERP action from timed out ERP request * @data: ERP action (from timer data) */ -void zfcp_erp_timeout_handler(unsigned long data) +void zfcp_erp_timeout_handler(struct timer_list *t) { - struct zfcp_erp_action *act = (struct zfcp_erp_action *) data; + struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer); + struct zfcp_erp_action *act = fsf_req->erp_action; zfcp_erp_notify(act, ZFCP_STATUS_ERP_TIMEDOUT); } -static void zfcp_erp_memwait_handler(unsigned long data) +static void zfcp_erp_memwait_handler(struct timer_list *t) { - zfcp_erp_notify((struct zfcp_erp_action *)data, 0); + struct zfcp_erp_action *act = from_timer(act, t, timer); + + zfcp_erp_notify(act, 0); } static void zfcp_erp_strategy_memwait(struct zfcp_erp_action *erp_action) { - setup_timer(&erp_action->timer, zfcp_erp_memwait_handler, - (unsigned long) erp_action); erp_action->timer.expires = jiffies + HZ; add_timer(&erp_action->timer); } diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 8ca2ab7deaa9..978a0d596f68 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -69,7 +69,7 @@ extern int zfcp_erp_thread_setup(struct zfcp_adapter *); extern void zfcp_erp_thread_kill(struct zfcp_adapter *); extern void zfcp_erp_wait(struct zfcp_adapter *); extern void zfcp_erp_notify(struct zfcp_erp_action *, unsigned long); -extern void zfcp_erp_timeout_handler(unsigned long); +extern void zfcp_erp_timeout_handler(struct timer_list *); /* zfcp_fc.c */ extern struct kmem_cache *zfcp_fc_req_cache; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 00fb98f7b2cd..6f437df1995f 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -21,9 +21,10 @@ struct kmem_cache *zfcp_fsf_qtcb_cache; -static void zfcp_fsf_request_timeout_handler(unsigned long data) +static void zfcp_fsf_request_timeout_handler(struct timer_list *t) { - struct zfcp_adapter *adapter = (struct zfcp_adapter *) data; + struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer); + struct zfcp_adapter *adapter = fsf_req->adapter; zfcp_qdio_siosl(adapter); zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED, "fsrth_1"); @@ -32,8 +33,7 @@ static void zfcp_fsf_request_timeout_handler(unsigned long data) static void zfcp_fsf_start_timer(struct zfcp_fsf_req *fsf_req, unsigned long timeout) { - fsf_req->timer.function = zfcp_fsf_request_timeout_handler; - fsf_req->timer.data = (unsigned long) fsf_req->adapter; + fsf_req->timer.function = (TIMER_FUNC_TYPE)zfcp_fsf_request_timeout_handler; fsf_req->timer.expires = jiffies + timeout; add_timer(&fsf_req->timer); } @@ -41,8 +41,7 @@ static void zfcp_fsf_start_timer(struct zfcp_fsf_req *fsf_req, static void zfcp_fsf_start_erp_timer(struct zfcp_fsf_req *fsf_req) { BUG_ON(!fsf_req->erp_action); - fsf_req->timer.function = zfcp_erp_timeout_handler; - fsf_req->timer.data = (unsigned long) fsf_req->erp_action; + fsf_req->timer.function = (TIMER_FUNC_TYPE)zfcp_erp_timeout_handler; fsf_req->timer.expires = jiffies + 30 * HZ; add_timer(&fsf_req->timer); } @@ -692,7 +691,7 @@ static struct zfcp_fsf_req *zfcp_fsf_req_create(struct zfcp_qdio *qdio, adapter->req_no++; INIT_LIST_HEAD(&req->list); - init_timer(&req->timer); + timer_setup(&req->timer, NULL, 0); init_completion(&req->completion); req->adapter = adapter; From 5c13db9b5df2dc97b0d4ce9d442bbcd80b5194b7 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 17 Oct 2017 18:40:51 +0200 Subject: [PATCH 29/30] zfcp: purely mechanical update using timer API, plus blank lines erp_memwait only occurs in seldom memory pressure situations. The typical case never uses the associated timer and thus also does not need to initialize the timer. Also, we don't want to re-initialize the timer each time we re-use an erp_action in zfcp_erp_setup_act() [see also v4.14-rc7 commit ab31fd0ce65e ("scsi: zfcp: fix erp_action use-before-initialize in REC action trace") for erp_action life cycle]. Hence, retain the lazy inintialization of zfcp_erp_action.timer in zfcp_erp_strategy_memwait(). Add an empty line after declarations in zfcp_erp_timeout_handler() and zfcp_fsf_request_timeout_handler() even though it was also missing before the timer conversion. Fix checkpatch warning: WARNING: function definition argument 'struct timer_list *' should also have an identifier name +extern void zfcp_erp_timeout_handler(struct timer_list *); Depends-on: v4.14-rc3 commit 686fef928bba ("timer: Prepare to change timer callback argument type") Signed-off-by: Steffen Maier Reviewed-by: Jens Remus Signed-off-by: Martin Schwidefsky --- drivers/s390/scsi/zfcp_erp.c | 5 ++--- drivers/s390/scsi/zfcp_ext.h | 2 +- drivers/s390/scsi/zfcp_fsf.c | 1 + 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 822a852d578e..1d91a32db08e 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -56,8 +56,6 @@ enum zfcp_erp_act_result { ZFCP_ERP_NOMEM = 5, }; -static void zfcp_erp_memwait_handler(struct timer_list *t); - static void zfcp_erp_adapter_block(struct zfcp_adapter *adapter, int mask) { zfcp_erp_clear_adapter_status(adapter, @@ -239,7 +237,6 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status, erp_action->fsf_req_id = 0; erp_action->action = need; erp_action->status = act_status; - timer_setup(&erp_action->timer, zfcp_erp_memwait_handler, 0); return erp_action; } @@ -571,6 +568,7 @@ void zfcp_erp_timeout_handler(struct timer_list *t) { struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer); struct zfcp_erp_action *act = fsf_req->erp_action; + zfcp_erp_notify(act, ZFCP_STATUS_ERP_TIMEDOUT); } @@ -583,6 +581,7 @@ static void zfcp_erp_memwait_handler(struct timer_list *t) static void zfcp_erp_strategy_memwait(struct zfcp_erp_action *erp_action) { + timer_setup(&erp_action->timer, zfcp_erp_memwait_handler, 0); erp_action->timer.expires = jiffies + HZ; add_timer(&erp_action->timer); } diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 978a0d596f68..bf8ea4df2bb8 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -69,7 +69,7 @@ extern int zfcp_erp_thread_setup(struct zfcp_adapter *); extern void zfcp_erp_thread_kill(struct zfcp_adapter *); extern void zfcp_erp_wait(struct zfcp_adapter *); extern void zfcp_erp_notify(struct zfcp_erp_action *, unsigned long); -extern void zfcp_erp_timeout_handler(struct timer_list *); +extern void zfcp_erp_timeout_handler(struct timer_list *t); /* zfcp_fc.c */ extern struct kmem_cache *zfcp_fc_req_cache; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 6f437df1995f..51b81c0a0652 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -25,6 +25,7 @@ static void zfcp_fsf_request_timeout_handler(struct timer_list *t) { struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer); struct zfcp_adapter *adapter = fsf_req->adapter; + zfcp_qdio_siosl(adapter); zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED, "fsrth_1"); From ab35727eb879ccc9487c6df0a3796be124ed39d3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 16 Nov 2017 13:49:50 +0100 Subject: [PATCH 30/30] s390: remove unused parameter from Makefile Remove unused parameter from the call function, which I accidentally added. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/tools/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile index 2ebf2872cc16..2e70e25de07a 100644 --- a/arch/s390/tools/Makefile +++ b/arch/s390/tools/Makefile @@ -21,4 +21,4 @@ include/generated/facilities.h: $(obj)/gen_facilities FORCE $(call filechk,facilities.h) include/generated/dis.h: $(obj)/gen_opcode_table FORCE - $(call filechk,dis.h,__FUN) + $(call filechk,dis.h)