829a999625
The ".acq" semantics of the load only apply w.r.t. other data access. Reading the clock (ar.itc) isn't a data access so strange things can happen here. Specifically the read of ar.itc can be launched as soon as the read of xtime_lock.sequence is ISSUED. Since this may cache miss, and that might cause a thread switch, and there may be cache contention for the line containing xtime_lock, it may be a long time before the actual value is returned, so the ar.itc value may be very stale. Move the consumption of r28 up before the read of ar.itc to make sure that we really have got the current value of xtime_lock.sequence before look at ar.itc. Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
991 lines
29 KiB
ArmAsm
991 lines
29 KiB
ArmAsm
/*
|
|
* This file contains the light-weight system call handlers (fsyscall-handlers).
|
|
*
|
|
* Copyright (C) 2003 Hewlett-Packard Co
|
|
* David Mosberger-Tang <davidm@hpl.hp.com>
|
|
*
|
|
* 25-Sep-03 davidm Implement fsys_rt_sigprocmask().
|
|
* 18-Feb-03 louisk Implement fsys_gettimeofday().
|
|
* 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more,
|
|
* probably broke it along the way... ;-)
|
|
* 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make
|
|
* it capable of using memory based clocks without falling back to C code.
|
|
* 08-Feb-07 Fenghua Yu Implement fsys_getcpu.
|
|
*
|
|
*/
|
|
|
|
#include <asm/asmmacro.h>
|
|
#include <asm/errno.h>
|
|
#include <asm/asm-offsets.h>
|
|
#include <asm/percpu.h>
|
|
#include <asm/thread_info.h>
|
|
#include <asm/sal.h>
|
|
#include <asm/signal.h>
|
|
#include <asm/system.h>
|
|
#include <asm/unistd.h>
|
|
|
|
#include "entry.h"
|
|
|
|
/*
|
|
* See Documentation/ia64/fsys.txt for details on fsyscalls.
|
|
*
|
|
* On entry to an fsyscall handler:
|
|
* r10 = 0 (i.e., defaults to "successful syscall return")
|
|
* r11 = saved ar.pfs (a user-level value)
|
|
* r15 = system call number
|
|
* r16 = "current" task pointer (in normal kernel-mode, this is in r13)
|
|
* r32-r39 = system call arguments
|
|
* b6 = return address (a user-level value)
|
|
* ar.pfs = previous frame-state (a user-level value)
|
|
* PSR.be = cleared to zero (i.e., little-endian byte order is in effect)
|
|
* all other registers may contain values passed in from user-mode
|
|
*
|
|
* On return from an fsyscall handler:
|
|
* r11 = saved ar.pfs (as passed into the fsyscall handler)
|
|
* r15 = system call number (as passed into the fsyscall handler)
|
|
* r32-r39 = system call arguments (as passed into the fsyscall handler)
|
|
* b6 = return address (as passed into the fsyscall handler)
|
|
* ar.pfs = previous frame-state (as passed into the fsyscall handler)
|
|
*/
|
|
|
|
ENTRY(fsys_ni_syscall)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
mov r8=ENOSYS
|
|
mov r10=-1
|
|
FSYS_RETURN
|
|
END(fsys_ni_syscall)
|
|
|
|
ENTRY(fsys_getpid)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
|
|
;;
|
|
ld4 r9=[r9]
|
|
add r8=IA64_TASK_TGID_OFFSET,r16
|
|
;;
|
|
and r9=TIF_ALLWORK_MASK,r9
|
|
ld4 r8=[r8] // r8 = current->tgid
|
|
;;
|
|
cmp.ne p8,p0=0,r9
|
|
(p8) br.spnt.many fsys_fallback_syscall
|
|
FSYS_RETURN
|
|
END(fsys_getpid)
|
|
|
|
ENTRY(fsys_getppid)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
|
|
;;
|
|
ld8 r17=[r17] // r17 = current->group_leader
|
|
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
|
|
;;
|
|
|
|
ld4 r9=[r9]
|
|
add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = ¤t->group_leader->real_parent
|
|
;;
|
|
and r9=TIF_ALLWORK_MASK,r9
|
|
|
|
1: ld8 r18=[r17] // r18 = current->group_leader->real_parent
|
|
;;
|
|
cmp.ne p8,p0=0,r9
|
|
add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = ¤t->group_leader->real_parent->tgid
|
|
;;
|
|
|
|
/*
|
|
* The .acq is needed to ensure that the read of tgid has returned its data before
|
|
* we re-check "real_parent".
|
|
*/
|
|
ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid
|
|
#ifdef CONFIG_SMP
|
|
/*
|
|
* Re-read current->group_leader->real_parent.
|
|
*/
|
|
ld8 r19=[r17] // r19 = current->group_leader->real_parent
|
|
(p8) br.spnt.many fsys_fallback_syscall
|
|
;;
|
|
cmp.ne p6,p0=r18,r19 // did real_parent change?
|
|
mov r19=0 // i must not leak kernel bits...
|
|
(p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check
|
|
;;
|
|
mov r17=0 // i must not leak kernel bits...
|
|
mov r18=0 // i must not leak kernel bits...
|
|
#else
|
|
mov r17=0 // i must not leak kernel bits...
|
|
mov r18=0 // i must not leak kernel bits...
|
|
mov r19=0 // i must not leak kernel bits...
|
|
#endif
|
|
FSYS_RETURN
|
|
END(fsys_getppid)
|
|
|
|
ENTRY(fsys_set_tid_address)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
|
|
;;
|
|
ld4 r9=[r9]
|
|
tnat.z p6,p7=r32 // check argument register for being NaT
|
|
;;
|
|
and r9=TIF_ALLWORK_MASK,r9
|
|
add r8=IA64_TASK_PID_OFFSET,r16
|
|
add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
|
|
;;
|
|
ld4 r8=[r8]
|
|
cmp.ne p8,p0=0,r9
|
|
mov r17=-1
|
|
;;
|
|
(p6) st8 [r18]=r32
|
|
(p7) st8 [r18]=r17
|
|
(p8) br.spnt.many fsys_fallback_syscall
|
|
;;
|
|
mov r17=0 // i must not leak kernel bits...
|
|
mov r18=0 // i must not leak kernel bits...
|
|
FSYS_RETURN
|
|
END(fsys_set_tid_address)
|
|
|
|
/*
|
|
* Ensure that the time interpolator structure is compatible with the asm code
|
|
*/
|
|
#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \
|
|
|| IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4
|
|
#error fsys_gettimeofday incompatible with changes to struct time_interpolator
|
|
#endif
|
|
#define CLOCK_REALTIME 0
|
|
#define CLOCK_MONOTONIC 1
|
|
#define CLOCK_DIVIDE_BY_1000 0x4000
|
|
#define CLOCK_ADD_MONOTONIC 0x8000
|
|
|
|
ENTRY(fsys_gettimeofday)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
mov r31 = r32
|
|
tnat.nz p6,p0 = r33 // guard against NaT argument
|
|
(p6) br.cond.spnt.few .fail_einval
|
|
mov r30 = CLOCK_DIVIDE_BY_1000
|
|
;;
|
|
.gettime:
|
|
// Register map
|
|
// Incoming r31 = pointer to address where to place result
|
|
// r30 = flags determining how time is processed
|
|
// r2,r3 = temp r4-r7 preserved
|
|
// r8 = result nanoseconds
|
|
// r9 = result seconds
|
|
// r10 = temporary storage for clock difference
|
|
// r11 = preserved: saved ar.pfs
|
|
// r12 = preserved: memory stack
|
|
// r13 = preserved: thread pointer
|
|
// r14 = address of mask / mask
|
|
// r15 = preserved: system call number
|
|
// r16 = preserved: current task pointer
|
|
// r17 = wall to monotonic use
|
|
// r18 = time_interpolator->offset
|
|
// r19 = address of wall_to_monotonic
|
|
// r20 = pointer to struct time_interpolator / pointer to time_interpolator->address
|
|
// r21 = shift factor
|
|
// r22 = address of time interpolator->last_counter
|
|
// r23 = address of time_interpolator->last_cycle
|
|
// r24 = adress of time_interpolator->offset
|
|
// r25 = last_cycle value
|
|
// r26 = last_counter value
|
|
// r27 = pointer to xtime
|
|
// r28 = sequence number at the beginning of critcal section
|
|
// r29 = address of seqlock
|
|
// r30 = time processing flags / memory address
|
|
// r31 = pointer to result
|
|
// Predicates
|
|
// p6,p7 short term use
|
|
// p8 = timesource ar.itc
|
|
// p9 = timesource mmio64
|
|
// p10 = timesource mmio32
|
|
// p11 = timesource not to be handled by asm code
|
|
// p12 = memory time source ( = p9 | p10)
|
|
// p13 = do cmpxchg with time_interpolator_last_cycle
|
|
// p14 = Divide by 1000
|
|
// p15 = Add monotonic
|
|
//
|
|
// Note that instructions are optimized for McKinley. McKinley can process two
|
|
// bundles simultaneously and therefore we continuously try to feed the CPU
|
|
// two bundles and then a stop.
|
|
tnat.nz p6,p0 = r31 // branch deferred since it does not fit into bundle structure
|
|
mov pr = r30,0xc000 // Set predicates according to function
|
|
add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
|
|
movl r20 = time_interpolator
|
|
;;
|
|
ld8 r20 = [r20] // get pointer to time_interpolator structure
|
|
movl r29 = xtime_lock
|
|
ld4 r2 = [r2] // process work pending flags
|
|
movl r27 = xtime
|
|
;; // only one bundle here
|
|
ld8 r21 = [r20] // first quad with control information
|
|
and r2 = TIF_ALLWORK_MASK,r2
|
|
(p6) br.cond.spnt.few .fail_einval // deferred branch
|
|
;;
|
|
add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
|
|
extr r3 = r21,32,32 // time_interpolator->nsec_per_cyc
|
|
extr r8 = r21,0,16 // time_interpolator->source
|
|
cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
|
|
(p6) br.cond.spnt.many fsys_fallback_syscall
|
|
;;
|
|
cmp.eq p8,p12 = 0,r8 // Check for cpu timer
|
|
cmp.eq p9,p0 = 1,r8 // MMIO64 ?
|
|
extr r2 = r21,24,8 // time_interpolator->jitter
|
|
cmp.eq p10,p0 = 2,r8 // MMIO32 ?
|
|
cmp.ltu p11,p0 = 2,r8 // function or other clock
|
|
(p11) br.cond.spnt.many fsys_fallback_syscall
|
|
;;
|
|
setf.sig f7 = r3 // Setup for scaling of counter
|
|
(p15) movl r19 = wall_to_monotonic
|
|
(p12) ld8 r30 = [r10]
|
|
cmp.ne p13,p0 = r2,r0 // need jitter compensation?
|
|
extr r21 = r21,16,8 // shift factor
|
|
;;
|
|
.time_redo:
|
|
.pred.rel.mutex p8,p9,p10
|
|
ld4.acq r28 = [r29] // xtime_lock.sequence. Must come first for locking purposes
|
|
;;
|
|
and r28 = ~1,r28 // Make sequence even to force retry if odd
|
|
;;
|
|
(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
|
|
add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
|
|
(p9) ld8 r2 = [r30] // readq(ti->address). Could also have latency issues..
|
|
(p10) ld4 r2 = [r30] // readw(ti->address)
|
|
(p13) add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
|
|
;; // could be removed by moving the last add upward
|
|
ld8 r26 = [r22] // time_interpolator->last_counter
|
|
(p13) ld8 r25 = [r23] // time interpolator->last_cycle
|
|
add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
|
|
(p15) ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
|
|
ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
|
|
add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20
|
|
;;
|
|
ld8 r18 = [r24] // time_interpolator->offset
|
|
ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET // xtime.tv_nsec
|
|
(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
|
|
;;
|
|
ld8 r14 = [r14] // time_interpolator->mask
|
|
(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
|
|
sub r10 = r2,r26 // current_counter - last_counter
|
|
;;
|
|
(p6) sub r10 = r25,r26 // time we got was less than last_cycle
|
|
(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg
|
|
;;
|
|
and r10 = r10,r14 // Apply mask
|
|
;;
|
|
setf.sig f8 = r10
|
|
nop.i 123
|
|
;;
|
|
(p7) cmpxchg8.rel r3 = [r23],r2,ar.ccv
|
|
EX(.fail_efault, probe.w.fault r31, 3) // This takes 5 cycles and we have spare time
|
|
xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
|
|
(p15) add r9 = r9,r17 // Add wall to monotonic.secs to result secs
|
|
;;
|
|
(p15) ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
|
|
(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful redo
|
|
// simulate tbit.nz.or p7,p0 = r28,0
|
|
getf.sig r2 = f8
|
|
mf
|
|
add r8 = r8,r18 // Add time interpolator offset
|
|
;;
|
|
ld4 r10 = [r29] // xtime_lock.sequence
|
|
(p15) add r8 = r8, r17 // Add monotonic.nsecs to nsecs
|
|
shr.u r2 = r2,r21
|
|
;; // overloaded 3 bundles!
|
|
// End critical section.
|
|
add r8 = r8,r2 // Add xtime.nsecs
|
|
cmp4.ne.or p7,p0 = r28,r10
|
|
(p7) br.cond.dpnt.few .time_redo // sequence number changed ?
|
|
// Now r8=tv->tv_nsec and r9=tv->tv_sec
|
|
mov r10 = r0
|
|
movl r2 = 1000000000
|
|
add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
|
|
(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack
|
|
;;
|
|
.time_normalize:
|
|
mov r21 = r8
|
|
cmp.ge p6,p0 = r8,r2
|
|
(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting some time
|
|
;;
|
|
(p14) setf.sig f8 = r20
|
|
(p6) sub r8 = r8,r2
|
|
(p6) add r9 = 1,r9 // two nops before the branch.
|
|
(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod
|
|
(p6) br.cond.dpnt.few .time_normalize
|
|
;;
|
|
// Divided by 8 though shift. Now divide by 125
|
|
// The compiler was able to do that with a multiply
|
|
// and a shift and we do the same
|
|
EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
|
|
(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it...
|
|
;;
|
|
mov r8 = r0
|
|
(p14) getf.sig r2 = f8
|
|
;;
|
|
(p14) shr.u r21 = r2, 4
|
|
;;
|
|
EX(.fail_efault, st8 [r31] = r9)
|
|
EX(.fail_efault, st8 [r23] = r21)
|
|
FSYS_RETURN
|
|
.fail_einval:
|
|
mov r8 = EINVAL
|
|
mov r10 = -1
|
|
FSYS_RETURN
|
|
.fail_efault:
|
|
mov r8 = EFAULT
|
|
mov r10 = -1
|
|
FSYS_RETURN
|
|
END(fsys_gettimeofday)
|
|
|
|
ENTRY(fsys_clock_gettime)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
|
|
// Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
|
|
(p6) br.spnt.few fsys_fallback_syscall
|
|
mov r31 = r33
|
|
shl r30 = r32,15
|
|
br.many .gettime
|
|
END(fsys_clock_gettime)
|
|
|
|
/*
|
|
* long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
|
|
*/
|
|
#if _NSIG_WORDS != 1
|
|
# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
|
|
#endif
|
|
ENTRY(fsys_rt_sigprocmask)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
|
|
add r2=IA64_TASK_BLOCKED_OFFSET,r16
|
|
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
|
|
cmp4.ltu p6,p0=SIG_SETMASK,r32
|
|
|
|
cmp.ne p15,p0=r0,r34 // oset != NULL?
|
|
tnat.nz p8,p0=r34
|
|
add r31=IA64_TASK_SIGHAND_OFFSET,r16
|
|
;;
|
|
ld8 r3=[r2] // read/prefetch current->blocked
|
|
ld4 r9=[r9]
|
|
tnat.nz.or p6,p0=r35
|
|
|
|
cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
|
|
tnat.nz.or p6,p0=r32
|
|
(p6) br.spnt.few .fail_einval // fail with EINVAL
|
|
;;
|
|
#ifdef CONFIG_SMP
|
|
ld8 r31=[r31] // r31 <- current->sighand
|
|
#endif
|
|
and r9=TIF_ALLWORK_MASK,r9
|
|
tnat.nz.or p8,p0=r33
|
|
;;
|
|
cmp.ne p7,p0=0,r9
|
|
cmp.eq p6,p0=r0,r33 // set == NULL?
|
|
add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock
|
|
(p8) br.spnt.few .fail_efault // fail with EFAULT
|
|
(p7) br.spnt.many fsys_fallback_syscall // got pending kernel work...
|
|
(p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask
|
|
|
|
/* Argh, we actually have to do some work and _update_ the signal mask: */
|
|
|
|
EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set
|
|
EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set
|
|
mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
|
|
;;
|
|
|
|
rsm psr.i // mask interrupt delivery
|
|
mov ar.ccv=0
|
|
andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP
|
|
|
|
#ifdef CONFIG_SMP
|
|
mov r17=1
|
|
;;
|
|
cmpxchg4.acq r18=[r31],r17,ar.ccv // try to acquire the lock
|
|
mov r8=EINVAL // default to EINVAL
|
|
;;
|
|
ld8 r3=[r2] // re-read current->blocked now that we hold the lock
|
|
cmp4.ne p6,p0=r18,r0
|
|
(p6) br.cond.spnt.many .lock_contention
|
|
;;
|
|
#else
|
|
ld8 r3=[r2] // re-read current->blocked now that we hold the lock
|
|
mov r8=EINVAL // default to EINVAL
|
|
#endif
|
|
add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
|
|
add r19=IA64_TASK_SIGNAL_OFFSET,r16
|
|
cmp4.eq p6,p0=SIG_BLOCK,r32
|
|
;;
|
|
ld8 r19=[r19] // r19 <- current->signal
|
|
cmp4.eq p7,p0=SIG_UNBLOCK,r32
|
|
cmp4.eq p8,p0=SIG_SETMASK,r32
|
|
;;
|
|
ld8 r18=[r18] // r18 <- current->pending.signal
|
|
.pred.rel.mutex p6,p7,p8
|
|
(p6) or r14=r3,r14 // SIG_BLOCK
|
|
(p7) andcm r14=r3,r14 // SIG_UNBLOCK
|
|
|
|
(p8) mov r14=r14 // SIG_SETMASK
|
|
(p6) mov r8=0 // clear error code
|
|
// recalc_sigpending()
|
|
add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
|
|
|
|
add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
|
|
;;
|
|
ld4 r17=[r17] // r17 <- current->signal->group_stop_count
|
|
(p7) mov r8=0 // clear error code
|
|
|
|
ld8 r19=[r19] // r19 <- current->signal->shared_pending
|
|
;;
|
|
cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)?
|
|
(p8) mov r8=0 // clear error code
|
|
|
|
or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending
|
|
;;
|
|
// r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
|
|
andcm r18=r18,r14
|
|
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
|
|
;;
|
|
|
|
(p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending
|
|
mov r19=0 // i must not leak kernel bits...
|
|
(p6) br.cond.dpnt.many .sig_pending
|
|
;;
|
|
|
|
1: ld4 r17=[r9] // r17 <- current->thread_info->flags
|
|
;;
|
|
mov ar.ccv=r17
|
|
and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING)
|
|
;;
|
|
|
|
st8 [r2]=r14 // update current->blocked with new mask
|
|
cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18
|
|
;;
|
|
cmp.ne p6,p0=r17,r8 // update failed?
|
|
(p6) br.cond.spnt.few 1b // yes -> retry
|
|
|
|
#ifdef CONFIG_SMP
|
|
st4.rel [r31]=r0 // release the lock
|
|
#endif
|
|
ssm psr.i
|
|
;;
|
|
|
|
srlz.d // ensure psr.i is set again
|
|
mov r18=0 // i must not leak kernel bits...
|
|
|
|
.store_mask:
|
|
EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset
|
|
EX(.fail_efault, (p15) st8 [r34]=r3)
|
|
mov r2=0 // i must not leak kernel bits...
|
|
mov r3=0 // i must not leak kernel bits...
|
|
mov r8=0 // return 0
|
|
mov r9=0 // i must not leak kernel bits...
|
|
mov r14=0 // i must not leak kernel bits...
|
|
mov r17=0 // i must not leak kernel bits...
|
|
mov r31=0 // i must not leak kernel bits...
|
|
FSYS_RETURN
|
|
|
|
.sig_pending:
|
|
#ifdef CONFIG_SMP
|
|
st4.rel [r31]=r0 // release the lock
|
|
#endif
|
|
ssm psr.i
|
|
;;
|
|
srlz.d
|
|
br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall
|
|
|
|
#ifdef CONFIG_SMP
|
|
.lock_contention:
|
|
/* Rather than spinning here, fall back on doing a heavy-weight syscall. */
|
|
ssm psr.i
|
|
;;
|
|
srlz.d
|
|
br.sptk.many fsys_fallback_syscall
|
|
#endif
|
|
END(fsys_rt_sigprocmask)
|
|
|
|
/*
|
|
* fsys_getcpu doesn't use the third parameter in this implementation. It reads
|
|
* current_thread_info()->cpu and corresponding node in cpu_to_node_map.
|
|
*/
|
|
ENTRY(fsys_getcpu)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
;;
|
|
add r2=TI_FLAGS+IA64_TASK_SIZE,r16
|
|
tnat.nz p6,p0 = r32 // guard against NaT argument
|
|
add r3=TI_CPU+IA64_TASK_SIZE,r16
|
|
;;
|
|
ld4 r3=[r3] // M r3 = thread_info->cpu
|
|
ld4 r2=[r2] // M r2 = thread_info->flags
|
|
(p6) br.cond.spnt.few .fail_einval // B
|
|
;;
|
|
tnat.nz p7,p0 = r33 // I guard against NaT argument
|
|
(p7) br.cond.spnt.few .fail_einval // B
|
|
#ifdef CONFIG_NUMA
|
|
movl r17=cpu_to_node_map
|
|
;;
|
|
EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles
|
|
EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles
|
|
shladd r18=r3,1,r17
|
|
;;
|
|
ld2 r20=[r18] // r20 = cpu_to_node_map[cpu]
|
|
and r2 = TIF_ALLWORK_MASK,r2
|
|
;;
|
|
cmp.ne p8,p0=0,r2
|
|
(p8) br.spnt.many fsys_fallback_syscall
|
|
;;
|
|
;;
|
|
EX(.fail_efault, st4 [r32] = r3)
|
|
EX(.fail_efault, st2 [r33] = r20)
|
|
mov r8=0
|
|
;;
|
|
#else
|
|
EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles
|
|
EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles
|
|
and r2 = TIF_ALLWORK_MASK,r2
|
|
;;
|
|
cmp.ne p8,p0=0,r2
|
|
(p8) br.spnt.many fsys_fallback_syscall
|
|
;;
|
|
EX(.fail_efault, st4 [r32] = r3)
|
|
EX(.fail_efault, st2 [r33] = r0)
|
|
mov r8=0
|
|
;;
|
|
#endif
|
|
FSYS_RETURN
|
|
END(fsys_getcpu)
|
|
|
|
ENTRY(fsys_fallback_syscall)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
/*
|
|
* We only get here from light-weight syscall handlers. Thus, we already
|
|
* know that r15 contains a valid syscall number. No need to re-check.
|
|
*/
|
|
adds r17=-1024,r15
|
|
movl r14=sys_call_table
|
|
;;
|
|
rsm psr.i
|
|
shladd r18=r17,3,r14
|
|
;;
|
|
ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point
|
|
mov r29=psr // read psr (12 cyc load latency)
|
|
mov r27=ar.rsc
|
|
mov r21=ar.fpsr
|
|
mov r26=ar.pfs
|
|
END(fsys_fallback_syscall)
|
|
/* FALL THROUGH */
|
|
GLOBAL_ENTRY(fsys_bubble_down)
|
|
.prologue
|
|
.altrp b6
|
|
.body
|
|
/*
|
|
* We get here for syscalls that don't have a lightweight
|
|
* handler. For those, we need to bubble down into the kernel
|
|
* and that requires setting up a minimal pt_regs structure,
|
|
* and initializing the CPU state more or less as if an
|
|
* interruption had occurred. To make syscall-restarts work,
|
|
* we setup pt_regs such that cr_iip points to the second
|
|
* instruction in syscall_via_break. Decrementing the IP
|
|
* hence will restart the syscall via break and not
|
|
* decrementing IP will return us to the caller, as usual.
|
|
* Note that we preserve the value of psr.pp rather than
|
|
* initializing it from dcr.pp. This makes it possible to
|
|
* distinguish fsyscall execution from other privileged
|
|
* execution.
|
|
*
|
|
* On entry:
|
|
* - normal fsyscall handler register usage, except
|
|
* that we also have:
|
|
* - r18: address of syscall entry point
|
|
* - r21: ar.fpsr
|
|
* - r26: ar.pfs
|
|
* - r27: ar.rsc
|
|
* - r29: psr
|
|
*
|
|
* We used to clear some PSR bits here but that requires slow
|
|
* serialization. Fortuntely, that isn't really necessary.
|
|
* The rationale is as follows: we used to clear bits
|
|
* ~PSR_PRESERVED_BITS in PSR.L. Since
|
|
* PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
|
|
* ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
|
|
* However,
|
|
*
|
|
* PSR.BE : already is turned off in __kernel_syscall_via_epc()
|
|
* PSR.AC : don't care (kernel normally turns PSR.AC on)
|
|
* PSR.I : already turned off by the time fsys_bubble_down gets
|
|
* invoked
|
|
* PSR.DFL: always 0 (kernel never turns it on)
|
|
* PSR.DFH: don't care --- kernel never touches f32-f127 on its own
|
|
* initiative
|
|
* PSR.DI : always 0 (kernel never turns it on)
|
|
* PSR.SI : always 0 (kernel never turns it on)
|
|
* PSR.DB : don't care --- kernel never enables kernel-level
|
|
* breakpoints
|
|
* PSR.TB : must be 0 already; if it wasn't zero on entry to
|
|
* __kernel_syscall_via_epc, the branch to fsys_bubble_down
|
|
* will trigger a taken branch; the taken-trap-handler then
|
|
* converts the syscall into a break-based system-call.
|
|
*/
|
|
/*
|
|
* Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
|
|
* The rest we have to synthesize.
|
|
*/
|
|
# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \
|
|
| (0x1 << IA64_PSR_RI_BIT) \
|
|
| IA64_PSR_BN | IA64_PSR_I)
|
|
|
|
invala // M0|1
|
|
movl r14=ia64_ret_from_syscall // X
|
|
|
|
nop.m 0
|
|
movl r28=__kernel_syscall_via_break // X create cr.iip
|
|
;;
|
|
|
|
mov r2=r16 // A get task addr to addl-addressable register
|
|
adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
|
|
mov r31=pr // I0 save pr (2 cyc)
|
|
;;
|
|
st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
|
|
addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS
|
|
add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A
|
|
;;
|
|
ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags
|
|
lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store
|
|
nop.i 0
|
|
;;
|
|
mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
|
|
nop.m 0
|
|
nop.i 0
|
|
;;
|
|
mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore
|
|
mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!)
|
|
nop.i 0
|
|
;;
|
|
mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS
|
|
movl r8=PSR_ONE_BITS // X
|
|
;;
|
|
mov r25=ar.unat // M2 (5 cyc) save ar.unat
|
|
mov r19=b6 // I0 save b6 (2 cyc)
|
|
mov r20=r1 // A save caller's gp in r20
|
|
;;
|
|
or r29=r8,r29 // A construct cr.ipsr value to save
|
|
mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc)
|
|
addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
|
|
|
|
mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc)
|
|
cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
|
|
br.call.sptk.many b7=ia64_syscall_setup // B
|
|
;;
|
|
mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
|
|
mov rp=r14 // I0 set the real return addr
|
|
and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A
|
|
;;
|
|
ssm psr.i // M2 we're on kernel stacks now, reenable irqs
|
|
cmp.eq p8,p0=r3,r0 // A
|
|
(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT
|
|
|
|
nop.m 0
|
|
(p8) br.call.sptk.many b6=b6 // B (ignore return address)
|
|
br.cond.spnt ia64_trace_syscall // B
|
|
END(fsys_bubble_down)
|
|
|
|
.rodata
|
|
.align 8
|
|
.globl fsyscall_table
|
|
|
|
data8 fsys_bubble_down
|
|
fsyscall_table:
|
|
data8 fsys_ni_syscall
|
|
data8 0 // exit // 1025
|
|
data8 0 // read
|
|
data8 0 // write
|
|
data8 0 // open
|
|
data8 0 // close
|
|
data8 0 // creat // 1030
|
|
data8 0 // link
|
|
data8 0 // unlink
|
|
data8 0 // execve
|
|
data8 0 // chdir
|
|
data8 0 // fchdir // 1035
|
|
data8 0 // utimes
|
|
data8 0 // mknod
|
|
data8 0 // chmod
|
|
data8 0 // chown
|
|
data8 0 // lseek // 1040
|
|
data8 fsys_getpid // getpid
|
|
data8 fsys_getppid // getppid
|
|
data8 0 // mount
|
|
data8 0 // umount
|
|
data8 0 // setuid // 1045
|
|
data8 0 // getuid
|
|
data8 0 // geteuid
|
|
data8 0 // ptrace
|
|
data8 0 // access
|
|
data8 0 // sync // 1050
|
|
data8 0 // fsync
|
|
data8 0 // fdatasync
|
|
data8 0 // kill
|
|
data8 0 // rename
|
|
data8 0 // mkdir // 1055
|
|
data8 0 // rmdir
|
|
data8 0 // dup
|
|
data8 0 // pipe
|
|
data8 0 // times
|
|
data8 0 // brk // 1060
|
|
data8 0 // setgid
|
|
data8 0 // getgid
|
|
data8 0 // getegid
|
|
data8 0 // acct
|
|
data8 0 // ioctl // 1065
|
|
data8 0 // fcntl
|
|
data8 0 // umask
|
|
data8 0 // chroot
|
|
data8 0 // ustat
|
|
data8 0 // dup2 // 1070
|
|
data8 0 // setreuid
|
|
data8 0 // setregid
|
|
data8 0 // getresuid
|
|
data8 0 // setresuid
|
|
data8 0 // getresgid // 1075
|
|
data8 0 // setresgid
|
|
data8 0 // getgroups
|
|
data8 0 // setgroups
|
|
data8 0 // getpgid
|
|
data8 0 // setpgid // 1080
|
|
data8 0 // setsid
|
|
data8 0 // getsid
|
|
data8 0 // sethostname
|
|
data8 0 // setrlimit
|
|
data8 0 // getrlimit // 1085
|
|
data8 0 // getrusage
|
|
data8 fsys_gettimeofday // gettimeofday
|
|
data8 0 // settimeofday
|
|
data8 0 // select
|
|
data8 0 // poll // 1090
|
|
data8 0 // symlink
|
|
data8 0 // readlink
|
|
data8 0 // uselib
|
|
data8 0 // swapon
|
|
data8 0 // swapoff // 1095
|
|
data8 0 // reboot
|
|
data8 0 // truncate
|
|
data8 0 // ftruncate
|
|
data8 0 // fchmod
|
|
data8 0 // fchown // 1100
|
|
data8 0 // getpriority
|
|
data8 0 // setpriority
|
|
data8 0 // statfs
|
|
data8 0 // fstatfs
|
|
data8 0 // gettid // 1105
|
|
data8 0 // semget
|
|
data8 0 // semop
|
|
data8 0 // semctl
|
|
data8 0 // msgget
|
|
data8 0 // msgsnd // 1110
|
|
data8 0 // msgrcv
|
|
data8 0 // msgctl
|
|
data8 0 // shmget
|
|
data8 0 // shmat
|
|
data8 0 // shmdt // 1115
|
|
data8 0 // shmctl
|
|
data8 0 // syslog
|
|
data8 0 // setitimer
|
|
data8 0 // getitimer
|
|
data8 0 // 1120
|
|
data8 0
|
|
data8 0
|
|
data8 0 // vhangup
|
|
data8 0 // lchown
|
|
data8 0 // remap_file_pages // 1125
|
|
data8 0 // wait4
|
|
data8 0 // sysinfo
|
|
data8 0 // clone
|
|
data8 0 // setdomainname
|
|
data8 0 // newuname // 1130
|
|
data8 0 // adjtimex
|
|
data8 0
|
|
data8 0 // init_module
|
|
data8 0 // delete_module
|
|
data8 0 // 1135
|
|
data8 0
|
|
data8 0 // quotactl
|
|
data8 0 // bdflush
|
|
data8 0 // sysfs
|
|
data8 0 // personality // 1140
|
|
data8 0 // afs_syscall
|
|
data8 0 // setfsuid
|
|
data8 0 // setfsgid
|
|
data8 0 // getdents
|
|
data8 0 // flock // 1145
|
|
data8 0 // readv
|
|
data8 0 // writev
|
|
data8 0 // pread64
|
|
data8 0 // pwrite64
|
|
data8 0 // sysctl // 1150
|
|
data8 0 // mmap
|
|
data8 0 // munmap
|
|
data8 0 // mlock
|
|
data8 0 // mlockall
|
|
data8 0 // mprotect // 1155
|
|
data8 0 // mremap
|
|
data8 0 // msync
|
|
data8 0 // munlock
|
|
data8 0 // munlockall
|
|
data8 0 // sched_getparam // 1160
|
|
data8 0 // sched_setparam
|
|
data8 0 // sched_getscheduler
|
|
data8 0 // sched_setscheduler
|
|
data8 0 // sched_yield
|
|
data8 0 // sched_get_priority_max // 1165
|
|
data8 0 // sched_get_priority_min
|
|
data8 0 // sched_rr_get_interval
|
|
data8 0 // nanosleep
|
|
data8 0 // nfsservctl
|
|
data8 0 // prctl // 1170
|
|
data8 0 // getpagesize
|
|
data8 0 // mmap2
|
|
data8 0 // pciconfig_read
|
|
data8 0 // pciconfig_write
|
|
data8 0 // perfmonctl // 1175
|
|
data8 0 // sigaltstack
|
|
data8 0 // rt_sigaction
|
|
data8 0 // rt_sigpending
|
|
data8 fsys_rt_sigprocmask // rt_sigprocmask
|
|
data8 0 // rt_sigqueueinfo // 1180
|
|
data8 0 // rt_sigreturn
|
|
data8 0 // rt_sigsuspend
|
|
data8 0 // rt_sigtimedwait
|
|
data8 0 // getcwd
|
|
data8 0 // capget // 1185
|
|
data8 0 // capset
|
|
data8 0 // sendfile
|
|
data8 0
|
|
data8 0
|
|
data8 0 // socket // 1190
|
|
data8 0 // bind
|
|
data8 0 // connect
|
|
data8 0 // listen
|
|
data8 0 // accept
|
|
data8 0 // getsockname // 1195
|
|
data8 0 // getpeername
|
|
data8 0 // socketpair
|
|
data8 0 // send
|
|
data8 0 // sendto
|
|
data8 0 // recv // 1200
|
|
data8 0 // recvfrom
|
|
data8 0 // shutdown
|
|
data8 0 // setsockopt
|
|
data8 0 // getsockopt
|
|
data8 0 // sendmsg // 1205
|
|
data8 0 // recvmsg
|
|
data8 0 // pivot_root
|
|
data8 0 // mincore
|
|
data8 0 // madvise
|
|
data8 0 // newstat // 1210
|
|
data8 0 // newlstat
|
|
data8 0 // newfstat
|
|
data8 0 // clone2
|
|
data8 0 // getdents64
|
|
data8 0 // getunwind // 1215
|
|
data8 0 // readahead
|
|
data8 0 // setxattr
|
|
data8 0 // lsetxattr
|
|
data8 0 // fsetxattr
|
|
data8 0 // getxattr // 1220
|
|
data8 0 // lgetxattr
|
|
data8 0 // fgetxattr
|
|
data8 0 // listxattr
|
|
data8 0 // llistxattr
|
|
data8 0 // flistxattr // 1225
|
|
data8 0 // removexattr
|
|
data8 0 // lremovexattr
|
|
data8 0 // fremovexattr
|
|
data8 0 // tkill
|
|
data8 0 // futex // 1230
|
|
data8 0 // sched_setaffinity
|
|
data8 0 // sched_getaffinity
|
|
data8 fsys_set_tid_address // set_tid_address
|
|
data8 0 // fadvise64_64
|
|
data8 0 // tgkill // 1235
|
|
data8 0 // exit_group
|
|
data8 0 // lookup_dcookie
|
|
data8 0 // io_setup
|
|
data8 0 // io_destroy
|
|
data8 0 // io_getevents // 1240
|
|
data8 0 // io_submit
|
|
data8 0 // io_cancel
|
|
data8 0 // epoll_create
|
|
data8 0 // epoll_ctl
|
|
data8 0 // epoll_wait // 1245
|
|
data8 0 // restart_syscall
|
|
data8 0 // semtimedop
|
|
data8 0 // timer_create
|
|
data8 0 // timer_settime
|
|
data8 0 // timer_gettime // 1250
|
|
data8 0 // timer_getoverrun
|
|
data8 0 // timer_delete
|
|
data8 0 // clock_settime
|
|
data8 fsys_clock_gettime // clock_gettime
|
|
data8 0 // clock_getres // 1255
|
|
data8 0 // clock_nanosleep
|
|
data8 0 // fstatfs64
|
|
data8 0 // statfs64
|
|
data8 0 // mbind
|
|
data8 0 // get_mempolicy // 1260
|
|
data8 0 // set_mempolicy
|
|
data8 0 // mq_open
|
|
data8 0 // mq_unlink
|
|
data8 0 // mq_timedsend
|
|
data8 0 // mq_timedreceive // 1265
|
|
data8 0 // mq_notify
|
|
data8 0 // mq_getsetattr
|
|
data8 0 // kexec_load
|
|
data8 0 // vserver
|
|
data8 0 // waitid // 1270
|
|
data8 0 // add_key
|
|
data8 0 // request_key
|
|
data8 0 // keyctl
|
|
data8 0 // ioprio_set
|
|
data8 0 // ioprio_get // 1275
|
|
data8 0 // move_pages
|
|
data8 0 // inotify_init
|
|
data8 0 // inotify_add_watch
|
|
data8 0 // inotify_rm_watch
|
|
data8 0 // migrate_pages // 1280
|
|
data8 0 // openat
|
|
data8 0 // mkdirat
|
|
data8 0 // mknodat
|
|
data8 0 // fchownat
|
|
data8 0 // futimesat // 1285
|
|
data8 0 // newfstatat
|
|
data8 0 // unlinkat
|
|
data8 0 // renameat
|
|
data8 0 // linkat
|
|
data8 0 // symlinkat // 1290
|
|
data8 0 // readlinkat
|
|
data8 0 // fchmodat
|
|
data8 0 // faccessat
|
|
data8 0
|
|
data8 0 // 1295
|
|
data8 0 // unshare
|
|
data8 0 // splice
|
|
data8 0 // set_robust_list
|
|
data8 0 // get_robust_list
|
|
data8 0 // sync_file_range // 1300
|
|
data8 0 // tee
|
|
data8 0 // vmsplice
|
|
data8 0
|
|
data8 fsys_getcpu // getcpu // 1304
|
|
|
|
// fill in zeros for the remaining entries
|
|
.zero:
|
|
.space fsyscall_table + 8*NR_syscalls - .zero, 0
|