Merge branch 'sh/stable-updates'

This commit is contained in:
Paul Mundt 2009-08-15 12:59:42 +09:00
commit 60e0a4c7ad
45 changed files with 1051 additions and 420 deletions

View file

@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 31
EXTRAVERSION = -rc5
EXTRAVERSION = -rc6
NAME = Man-Eating Seals of Antiquity
# *DOCUMENTATION*

View file

@ -246,7 +246,7 @@ static struct platform_device ceu1_device = {
},
};
/* KEYSC */
/* KEYSC in SoC (Needs SW33-2 set to ON) */
static struct sh_keysc_info keysc_info = {
.mode = SH_KEYSC_MODE_1,
.scan_timing = 10,
@ -263,12 +263,13 @@ static struct sh_keysc_info keysc_info = {
static struct resource keysc_resources[] = {
[0] = {
.start = 0x1a204000,
.end = 0x1a20400f,
.name = "KEYSC",
.start = 0x044b0000,
.end = 0x044b000f,
.flags = IORESOURCE_MEM,
},
[1] = {
.start = IRQ0_KEY,
.start = 79,
.flags = IORESOURCE_IRQ,
},
};

View file

@ -26,8 +26,30 @@ ENTRY(sh_mobile_standby)
tst #SUSP_SH_SF, r0
bt skip_set_sf
#ifdef CONFIG_CPU_SUBTYPE_SH7724
/* DBSC: put memory in self-refresh mode */
/* SDRAM: disable power down and put in self-refresh mode */
mov.l dben_reg, r4
mov.l dben_data0, r1
mov.l r1, @r4
mov.l dbrfpdn0_reg, r4
mov.l dbrfpdn0_data0, r1
mov.l r1, @r4
mov.l dbcmdcnt_reg, r4
mov.l dbcmdcnt_data0, r1
mov.l r1, @r4
mov.l dbcmdcnt_reg, r4
mov.l dbcmdcnt_data1, r1
mov.l r1, @r4
mov.l dbrfpdn0_reg, r4
mov.l dbrfpdn0_data1, r1
mov.l r1, @r4
#else
/* SBSC: disable power down and put in self-refresh mode */
mov.l 1f, r4
mov.l 2f, r1
mov.l @r4, r2
@ -35,6 +57,7 @@ ENTRY(sh_mobile_standby)
mov.l 3f, r3
and r3, r2
mov.l r2, @r4
#endif
skip_set_sf:
tst #SUSP_SH_SLEEP, r0
@ -84,7 +107,36 @@ done_sleep:
tst #SUSP_SH_SF, r0
bt skip_restore_sf
/* SDRAM: set auto-refresh mode */
#ifdef CONFIG_CPU_SUBTYPE_SH7724
/* DBSC: put memory in auto-refresh mode */
mov.l dbrfpdn0_reg, r4
mov.l dbrfpdn0_data0, r1
mov.l r1, @r4
/* sleep 140 ns */
nop
nop
nop
nop
mov.l dbcmdcnt_reg, r4
mov.l dbcmdcnt_data0, r1
mov.l r1, @r4
mov.l dbcmdcnt_reg, r4
mov.l dbcmdcnt_data1, r1
mov.l r1, @r4
mov.l dben_reg, r4
mov.l dben_data1, r1
mov.l r1, @r4
mov.l dbrfpdn0_reg, r4
mov.l dbrfpdn0_data2, r1
mov.l r1, @r4
#else
/* SBSC: set auto-refresh mode */
mov.l 1f, r4
mov.l @r4, r2
mov.l 4f, r3
@ -98,15 +150,29 @@ done_sleep:
add r4, r3
or r2, r3
mov.l r3, @r1
#endif
skip_restore_sf:
rts
nop
.balign 4
#ifdef CONFIG_CPU_SUBTYPE_SH7724
dben_reg: .long 0xfd000010 /* DBEN */
dben_data0: .long 0
dben_data1: .long 1
dbrfpdn0_reg: .long 0xfd000040 /* DBRFPDN0 */
dbrfpdn0_data0: .long 0
dbrfpdn0_data1: .long 1
dbrfpdn0_data2: .long 0x00010000
dbcmdcnt_reg: .long 0xfd000014 /* DBCMDCNT */
dbcmdcnt_data0: .long 2
dbcmdcnt_data1: .long 4
#else
1: .long 0xfe400008 /* SDCR0 */
2: .long 0x00000400
3: .long 0xffff7fff
4: .long 0xfffffbff
#endif
5: .long 0xa4150020 /* STBCR */
6: .long 0xfe40001c /* RTCOR */
7: .long 0xfe400018 /* RTCNT */

View file

@ -24,6 +24,7 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_PERF_COUNTERS if (!M386 && !M486)
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
select ARCH_WANT_OPTIONAL_GPIOLIB
@ -742,7 +743,6 @@ config X86_UP_IOAPIC
config X86_LOCAL_APIC
def_bool y
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
select HAVE_PERF_COUNTERS if (!M386 && !M486)
config X86_IO_APIC
def_bool y

View file

@ -400,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
level = cpuid_eax(1);
if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
/*
* Some BIOSes incorrectly force this feature, but only K8
* revision D (model = 0x14) and later actually support it.
*/
if (c->x86_model < 0x14)
clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
}
if (c->x86 == 0x10 || c->x86 == 0x11)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);

View file

@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void)
alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
}
static const struct cpu_dev *this_cpu __cpuinitdata;
static void __cpuinit default_init(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_64
display_cacheinfo(c);
#else
/* Not much we can do here... */
/* Check if at least it has cpuid */
if (c->cpuid_level == -1) {
/* No cpuid. It must be an ancient CPU */
if (c->x86 == 4)
strcpy(c->x86_model_id, "486");
else if (c->x86 == 3)
strcpy(c->x86_model_id, "386");
}
#endif
}
static const struct cpu_dev __cpuinitconst default_cpu = {
.c_init = default_init,
.c_vendor = "Unknown",
.c_x86_vendor = X86_VENDOR_UNKNOWN,
};
static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
#ifdef CONFIG_X86_64
@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu)
static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
static void __cpuinit default_init(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_64
display_cacheinfo(c);
#else
/* Not much we can do here... */
/* Check if at least it has cpuid */
if (c->cpuid_level == -1) {
/* No cpuid. It must be an ancient CPU */
if (c->x86 == 4)
strcpy(c->x86_model_id, "486");
else if (c->x86 == 3)
strcpy(c->x86_model_id, "386");
}
#endif
}
static const struct cpu_dev __cpuinitconst default_cpu = {
.c_init = default_init,
.c_vendor = "Unknown",
.c_x86_vendor = X86_VENDOR_UNKNOWN,
};
static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
{
unsigned int *v;

View file

@ -36,6 +36,7 @@
static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
static DEFINE_PER_CPU(bool, thermal_throttle_active);
static atomic_t therm_throt_en = ATOMIC_INIT(0);
@ -96,24 +97,27 @@ static int therm_throt_process(int curr)
{
unsigned int cpu = smp_processor_id();
__u64 tmp_jiffs = get_jiffies_64();
bool was_throttled = __get_cpu_var(thermal_throttle_active);
bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr;
if (curr)
if (is_throttled)
__get_cpu_var(thermal_throttle_count)++;
if (time_before64(tmp_jiffs, __get_cpu_var(next_check)))
if (!(was_throttled ^ is_throttled) &&
time_before64(tmp_jiffs, __get_cpu_var(next_check)))
return 0;
__get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
/* if we just entered the thermal event */
if (curr) {
if (is_throttled) {
printk(KERN_CRIT "CPU%d: Temperature above threshold, "
"cpu clock throttled (total events = %lu)\n", cpu,
__get_cpu_var(thermal_throttle_count));
"cpu clock throttled (total events = %lu)\n",
cpu, __get_cpu_var(thermal_throttle_count));
add_taint(TAINT_MACHINE_CHECK);
} else {
printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu);
} else if (was_throttled) {
printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
}
return 1;

View file

@ -55,6 +55,7 @@ struct x86_pmu {
int num_counters_fixed;
int counter_bits;
u64 counter_mask;
int apic;
u64 max_period;
u64 intel_ctrl;
};
@ -72,8 +73,8 @@ static const u64 p6_perfmon_event_map[] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000,
[PERF_COUNT_HW_CACHE_MISSES] = 0x0000,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
[PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
[PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
static bool reserve_pmc_hardware(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
int i;
if (nmi_watchdog == NMI_LOCAL_APIC)
@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void)
if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
goto eventsel_fail;
}
#endif
return true;
#ifdef CONFIG_X86_LOCAL_APIC
eventsel_fail:
for (i--; i >= 0; i--)
release_evntsel_nmi(x86_pmu.eventsel + i);
@ -644,10 +648,12 @@ static bool reserve_pmc_hardware(void)
enable_lapic_nmi_watchdog();
return false;
#endif
}
static void release_pmc_hardware(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
int i;
for (i = 0; i < x86_pmu.num_counters; i++) {
@ -657,6 +663,7 @@ static void release_pmc_hardware(void)
if (nmi_watchdog == NMI_LOCAL_APIC)
enable_lapic_nmi_watchdog();
#endif
}
static void hw_perf_counter_destroy(struct perf_counter *counter)
@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
hwc->sample_period = x86_pmu.max_period;
hwc->last_period = hwc->sample_period;
atomic64_set(&hwc->period_left, hwc->sample_period);
} else {
/*
* If we have a PMU initialized but no APIC
* interrupts, we cannot sample hardware
* counters (user-space has to fall back and
* sample via a hrtimer based software counter):
*/
if (!x86_pmu.apic)
return -EOPNOTSUPP;
}
counter->destroy = hw_perf_counter_destroy;
@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
void set_perf_counter_pending(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
apic->send_IPI_self(LOCAL_PENDING_VECTOR);
#endif
}
void perf_counters_lapic_init(void)
{
if (!x86_pmu_initialized())
#ifdef CONFIG_X86_LOCAL_APIC
if (!x86_pmu.apic || !x86_pmu_initialized())
return;
/*
* Always use NMI for PMU
*/
apic_write(APIC_LVTPC, APIC_DM_NMI);
#endif
}
static int __kprobes
@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
regs = args->regs;
#ifdef CONFIG_X86_LOCAL_APIC
apic_write(APIC_LVTPC, APIC_DM_NMI);
#endif
/*
* Can't rely on the handled return value to say it was our NMI, two
* counters could trigger 'simultaneously' raising two back-to-back NMIs.
@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = {
.event_map = p6_pmu_event_map,
.raw_event = p6_pmu_raw_event,
.max_events = ARRAY_SIZE(p6_perfmon_event_map),
.apic = 1,
.max_period = (1ULL << 31) - 1,
.version = 0,
.num_counters = 2,
@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = {
.event_map = intel_pmu_event_map,
.raw_event = intel_pmu_raw_event,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
/*
* Intel PMCs cannot be accessed sanely above 32 bit width,
* so we install an artificial 1<<31 period regardless of
@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = {
.num_counters = 4,
.counter_bits = 48,
.counter_mask = (1ULL << 48) - 1,
.apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
};
@ -1589,12 +1614,13 @@ static int p6_pmu_init(void)
return -ENODEV;
}
if (!cpu_has_apic) {
pr_info("no Local APIC, try rebooting with lapic");
return -ENODEV;
}
x86_pmu = p6_pmu;
x86_pmu = p6_pmu;
if (!cpu_has_apic) {
pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
pr_info("no hardware sampling interrupt available.\n");
x86_pmu.apic = 0;
}
return 0;
}

View file

@ -418,20 +418,20 @@ static int __init set_pci_reboot(const struct dmi_system_id *d)
}
static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
{ /* Handle problems with rebooting on Apple MacBook5,2 */
{ /* Handle problems with rebooting on Apple MacBook5 */
.callback = set_pci_reboot,
.ident = "Apple MacBook",
.ident = "Apple MacBook5",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5,2"),
DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
},
},
{ /* Handle problems with rebooting on Apple MacBookPro5,1 */
{ /* Handle problems with rebooting on Apple MacBookPro5 */
.callback = set_pci_reboot,
.ident = "Apple MacBookPro5,1",
.ident = "Apple MacBookPro5",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5,1"),
DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
},
},
{ }

View file

@ -40,6 +40,7 @@ struct sh_cmt_priv {
struct platform_device *pdev;
unsigned long flags;
unsigned long flags_suspend;
unsigned long match_value;
unsigned long next_match_value;
unsigned long max_match_value;
@ -667,11 +668,38 @@ static int __devexit sh_cmt_remove(struct platform_device *pdev)
return -EBUSY; /* cannot unregister clockevent and clocksource */
}
static int sh_cmt_suspend(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
struct sh_cmt_priv *p = platform_get_drvdata(pdev);
/* save flag state and stop CMT channel */
p->flags_suspend = p->flags;
sh_cmt_stop(p, p->flags);
return 0;
}
static int sh_cmt_resume(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
struct sh_cmt_priv *p = platform_get_drvdata(pdev);
/* start CMT channel from saved state */
sh_cmt_start(p, p->flags_suspend);
return 0;
}
static struct dev_pm_ops sh_cmt_dev_pm_ops = {
.suspend = sh_cmt_suspend,
.resume = sh_cmt_resume,
};
static struct platform_driver sh_cmt_device_driver = {
.probe = sh_cmt_probe,
.remove = __devexit_p(sh_cmt_remove),
.driver = {
.name = "sh_cmt",
.pm = &sh_cmt_dev_pm_ops,
}
};

View file

@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
else
new->md_minor = MINOR(unit) >> MdpMinorShift;
mutex_init(&new->open_mutex);
mutex_init(&new->reconfig_mutex);
INIT_LIST_HEAD(&new->disks);
INIT_LIST_HEAD(&new->all_mddevs);
@ -1974,17 +1975,14 @@ static void md_update_sb(mddev_t * mddev, int force_change)
/* otherwise we have to go forward and ... */
mddev->events ++;
if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
/* .. if the array isn't clean, insist on an odd 'events' */
if ((mddev->events&1)==0) {
mddev->events++;
/* .. if the array isn't clean, an 'even' event must also go
* to spares. */
if ((mddev->events&1)==0)
nospares = 0;
}
} else {
/* otherwise insist on an even 'events' (for clean states) */
if ((mddev->events&1)) {
mddev->events++;
/* otherwise an 'odd' event must go to spares */
if ((mddev->events&1))
nospares = 0;
}
}
}
@ -3601,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)
if (max < mddev->resync_min)
return -EINVAL;
if (max < mddev->resync_max &&
mddev->ro == 0 &&
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY;
@ -4304,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
struct gendisk *disk = mddev->gendisk;
mdk_rdev_t *rdev;
mutex_lock(&mddev->open_mutex);
if (atomic_read(&mddev->openers) > is_open) {
printk("md: %s still in use.\n",mdname(mddev));
return -EBUSY;
}
if (mddev->pers) {
err = -EBUSY;
} else if (mddev->pers) {
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@ -4367,7 +4365,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
set_disk_ro(disk, 1);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}
out:
mutex_unlock(&mddev->open_mutex);
if (err)
return err;
/*
* Free resources if final stop
*/
@ -4433,7 +4434,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
blk_integrity_unregister(disk);
md_new_event(mddev);
sysfs_notify_dirent(mddev->sysfs_state);
out:
return err;
}
@ -5518,12 +5518,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
}
BUG_ON(mddev != bdev->bd_disk->private_data);
if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
goto out;
err = 0;
atomic_inc(&mddev->openers);
mddev_unlock(mddev);
mutex_unlock(&mddev->open_mutex);
check_disk_change(bdev);
out:

View file

@ -223,6 +223,16 @@ struct mddev_s
* so we don't loop trying */
int in_sync; /* know to not need resync */
/* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
* that we are never stopping an array while it is open.
* 'reconfig_mutex' protects all other reconfiguration.
* These locks are separate due to conflicting interactions
* with bdev->bd_mutex.
* Lock ordering is:
* reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
* bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
*/
struct mutex open_mutex;
struct mutex reconfig_mutex;
atomic_t active; /* general refcount */
atomic_t openers; /* number of active opens */

View file

@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
conf->reshape_progress < raid5_size(mddev, 0, 0)) {
sector_nr = raid5_size(mddev, 0, 0)
- conf->reshape_progress;
} else if (mddev->delta_disks > 0 &&
} else if (mddev->delta_disks >= 0 &&
conf->reshape_progress > 0)
sector_nr = conf->reshape_progress;
sector_div(sector_nr, new_data_disks);
@ -4509,7 +4509,26 @@ static int run(mddev_t *mddev)
(old_disks-max_degraded));
/* here_old is the first stripe that we might need to read
* from */
if (here_new >= here_old) {
if (mddev->delta_disks == 0) {
/* We cannot be sure it is safe to start an in-place
* reshape. It is only safe if user-space if monitoring
* and taking constant backups.
* mdadm always starts a situation like this in
* readonly mode so it can take control before
* allowing any writes. So just check for that.
*/
if ((here_new * mddev->new_chunk_sectors !=
here_old * mddev->chunk_sectors) ||
mddev->ro == 0) {
printk(KERN_ERR "raid5: in-place reshape must be started"
" in read-only mode - aborting\n");
return -EINVAL;
}
} else if (mddev->delta_disks < 0
? (here_new * mddev->new_chunk_sectors <=
here_old * mddev->chunk_sectors)
: (here_new * mddev->new_chunk_sectors >=
here_old * mddev->chunk_sectors)) {
/* Reading from the same stripe as writing to - bad */
printk(KERN_ERR "raid5: reshape_position too early for "
"auto-recovery - aborting.\n");
@ -5078,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev)
mddev->degraded--;
for (d = conf->raid_disks ;
d < conf->raid_disks - mddev->delta_disks;
d++)
raid5_remove_disk(mddev, d);
d++) {
mdk_rdev_t *rdev = conf->disks[d].rdev;
if (rdev && raid5_remove_disk(mddev, d) == 0) {
char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk);
sysfs_remove_link(&mddev->kobj, nm);
rdev->raid_disk = -1;
}
}
}
mddev->layout = conf->algorithm;
mddev->chunk_sectors = conf->chunk_sectors;

View file

@ -477,6 +477,9 @@ static int sh_mobile_lcdc_start(struct sh_mobile_lcdc_priv *priv)
/* tell the board code to enable the panel */
for (k = 0; k < ARRAY_SIZE(priv->ch); k++) {
ch = &priv->ch[k];
if (!ch->enabled)
continue;
board_cfg = &ch->cfg.board_cfg;
if (board_cfg->display_on)
board_cfg->display_on(board_cfg->board_data);
@ -494,6 +497,8 @@ static void sh_mobile_lcdc_stop(struct sh_mobile_lcdc_priv *priv)
/* clean up deferred io and ask board code to disable panel */
for (k = 0; k < ARRAY_SIZE(priv->ch); k++) {
ch = &priv->ch[k];
if (!ch->enabled)
continue;
/* deferred io mode:
* flush frame, and wait for frame end interrupt

View file

@ -1914,7 +1914,8 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
* immediately to their right.
*/
left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) {
if (!ocfs2_rec_clusters(right_child_el, &right_child_el->l_recs[0])) {
BUG_ON(right_child_el->l_tree_depth);
BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
}
@ -2476,15 +2477,37 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
return ret;
}
static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
struct ocfs2_path *path)
static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
int subtree_index, struct ocfs2_path *path)
{
int i, idx;
int i, idx, ret;
struct ocfs2_extent_rec *rec;
struct ocfs2_extent_list *el;
struct ocfs2_extent_block *eb;
u32 range;
/*
* In normal tree rotation process, we will never touch the
* tree branch above subtree_index and ocfs2_extend_rotate_transaction
* doesn't reserve the credits for them either.
*
* But we do have a special case here which will update the rightmost
* records for all the bh in the path.
* So we have to allocate extra credits and access them.
*/
ret = ocfs2_extend_trans(handle,
handle->h_buffer_credits + subtree_index);
if (ret) {
mlog_errno(ret);
goto out;
}
ret = ocfs2_journal_access_path(inode, handle, path);
if (ret) {
mlog_errno(ret);
goto out;
}
/* Path should always be rightmost. */
eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
BUG_ON(eb->h_next_leaf_blk != 0ULL);
@ -2505,6 +2528,8 @@ static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
ocfs2_journal_dirty(handle, path->p_node[i].bh);
}
out:
return ret;
}
static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
@ -2717,7 +2742,12 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
if (del_right_subtree) {
ocfs2_unlink_subtree(inode, handle, left_path, right_path,
subtree_index, dealloc);
ocfs2_update_edge_lengths(inode, handle, left_path);
ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
left_path);
if (ret) {
mlog_errno(ret);
goto out;
}
eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
@ -3034,7 +3064,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
ocfs2_unlink_subtree(inode, handle, left_path, path,
subtree_index, dealloc);
ocfs2_update_edge_lengths(inode, handle, left_path);
ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
left_path);
if (ret) {
mlog_errno(ret);
goto out;
}
eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));

View file

@ -193,6 +193,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
(unsigned long long)OCFS2_I(inode)->ip_blkno);
mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters);
dump_stack();
goto bail;
}
past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
@ -894,18 +895,17 @@ struct ocfs2_write_cluster_desc {
*/
unsigned c_new;
unsigned c_unwritten;
unsigned c_needs_zero;
};
static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
{
return d->c_new || d->c_unwritten;
}
struct ocfs2_write_ctxt {
/* Logical cluster position / len of write */
u32 w_cpos;
u32 w_clen;
/* First cluster allocated in a nonsparse extend */
u32 w_first_new_cpos;
struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
/*
@ -983,6 +983,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
return -ENOMEM;
wc->w_cpos = pos >> osb->s_clustersize_bits;
wc->w_first_new_cpos = UINT_MAX;
cend = (pos + len - 1) >> osb->s_clustersize_bits;
wc->w_clen = cend - wc->w_cpos + 1;
get_bh(di_bh);
@ -1217,20 +1218,18 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
*/
static int ocfs2_write_cluster(struct address_space *mapping,
u32 phys, unsigned int unwritten,
unsigned int should_zero,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_write_ctxt *wc, u32 cpos,
loff_t user_pos, unsigned user_len)
{
int ret, i, new, should_zero = 0;
int ret, i, new;
u64 v_blkno, p_blkno;
struct inode *inode = mapping->host;
struct ocfs2_extent_tree et;
new = phys == 0 ? 1 : 0;
if (new || unwritten)
should_zero = 1;
if (new) {
u32 tmp_pos;
@ -1301,7 +1300,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
if (tmpret) {
mlog_errno(tmpret);
if (ret == 0)
tmpret = ret;
ret = tmpret;
}
}
@ -1341,7 +1340,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
local_len = osb->s_clustersize - cluster_off;
ret = ocfs2_write_cluster(mapping, desc->c_phys,
desc->c_unwritten, data_ac, meta_ac,
desc->c_unwritten,
desc->c_needs_zero,
data_ac, meta_ac,
wc, desc->c_cpos, pos, local_len);
if (ret) {
mlog_errno(ret);
@ -1391,14 +1392,14 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
* newly allocated cluster.
*/
desc = &wc->w_desc[0];
if (ocfs2_should_zero_cluster(desc))
if (desc->c_needs_zero)
ocfs2_figure_cluster_boundaries(osb,
desc->c_cpos,
&wc->w_target_from,
NULL);
desc = &wc->w_desc[wc->w_clen - 1];
if (ocfs2_should_zero_cluster(desc))
if (desc->c_needs_zero)
ocfs2_figure_cluster_boundaries(osb,
desc->c_cpos,
NULL,
@ -1466,13 +1467,28 @@ static int ocfs2_populate_write_desc(struct inode *inode,
phys++;
}
/*
* If w_first_new_cpos is < UINT_MAX, we have a non-sparse
* file that got extended. w_first_new_cpos tells us
* where the newly allocated clusters are so we can
* zero them.
*/
if (desc->c_cpos >= wc->w_first_new_cpos) {
BUG_ON(phys == 0);
desc->c_needs_zero = 1;
}
desc->c_phys = phys;
if (phys == 0) {
desc->c_new = 1;
desc->c_needs_zero = 1;
*clusters_to_alloc = *clusters_to_alloc + 1;
}
if (ext_flags & OCFS2_EXT_UNWRITTEN)
if (ext_flags & OCFS2_EXT_UNWRITTEN) {
desc->c_unwritten = 1;
desc->c_needs_zero = 1;
}
num_clusters--;
}
@ -1632,10 +1648,13 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
if (newsize <= i_size_read(inode))
return 0;
ret = ocfs2_extend_no_holes(inode, newsize, newsize - len);
ret = ocfs2_extend_no_holes(inode, newsize, pos);
if (ret)
mlog_errno(ret);
wc->w_first_new_cpos =
ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
return ret;
}
@ -1644,7 +1663,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
struct page **pagep, void **fsdata,
struct buffer_head *di_bh, struct page *mmap_page)
{
int ret, credits = OCFS2_INODE_UPDATE_CREDITS;
int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
unsigned int clusters_to_alloc, extents_to_split;
struct ocfs2_write_ctxt *wc;
struct inode *inode = mapping->host;
@ -1722,8 +1741,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
}
ocfs2_set_target_boundaries(osb, wc, pos, len,
clusters_to_alloc + extents_to_split);
/*
* We have to zero sparse allocated clusters, unwritten extent clusters,
* and non-sparse clusters we just extended. For non-sparse writes,
* we know zeros will only be needed in the first and/or last cluster.
*/
if (clusters_to_alloc || extents_to_split ||
wc->w_desc[0].c_needs_zero ||
wc->w_desc[wc->w_clen - 1].c_needs_zero)
cluster_of_pages = 1;
else
cluster_of_pages = 0;
ocfs2_set_target_boundaries(osb, wc, pos, len, cluster_of_pages);
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
@ -1756,8 +1786,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
* extent.
*/
ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
clusters_to_alloc + extents_to_split,
mmap_page);
cluster_of_pages, mmap_page);
if (ret) {
mlog_errno(ret);
goto out_quota;

View file

@ -310,22 +310,19 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
return ret;
}
static DEFINE_SPINLOCK(dentry_list_lock);
DEFINE_SPINLOCK(dentry_list_lock);
/* We limit the number of dentry locks to drop in one go. We have
* this limit so that we don't starve other users of ocfs2_wq. */
#define DL_INODE_DROP_COUNT 64
/* Drop inode references from dentry locks */
void ocfs2_drop_dl_inodes(struct work_struct *work)
static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count)
{
struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
dentry_lock_work);
struct ocfs2_dentry_lock *dl;
int drop_count = DL_INODE_DROP_COUNT;
spin_lock(&dentry_list_lock);
while (osb->dentry_lock_list && drop_count--) {
while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) {
dl = osb->dentry_lock_list;
osb->dentry_lock_list = dl->dl_next;
spin_unlock(&dentry_list_lock);
@ -333,11 +330,32 @@ void ocfs2_drop_dl_inodes(struct work_struct *work)
kfree(dl);
spin_lock(&dentry_list_lock);
}
if (osb->dentry_lock_list)
spin_unlock(&dentry_list_lock);
}
void ocfs2_drop_dl_inodes(struct work_struct *work)
{
struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
dentry_lock_work);
__ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT);
/*
* Don't queue dropping if umount is in progress. We flush the
* list in ocfs2_dismount_volume
*/
spin_lock(&dentry_list_lock);
if (osb->dentry_lock_list &&
!ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
queue_work(ocfs2_wq, &osb->dentry_lock_work);
spin_unlock(&dentry_list_lock);
}
/* Flush the whole work queue */
void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
{
__ocfs2_drop_dl_inodes(osb, -1);
}
/*
* ocfs2_dentry_iput() and friends.
*
@ -368,7 +386,8 @@ static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
/* We leave dropping of inode reference to ocfs2_wq as that can
* possibly lead to inode deletion which gets tricky */
spin_lock(&dentry_list_lock);
if (!osb->dentry_lock_list)
if (!osb->dentry_lock_list &&
!ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
queue_work(ocfs2_wq, &osb->dentry_lock_work);
dl->dl_next = osb->dentry_lock_list;
osb->dentry_lock_list = dl;

View file

@ -49,10 +49,13 @@ struct ocfs2_dentry_lock {
int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
u64 parent_blkno);
extern spinlock_t dentry_list_lock;
void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
struct ocfs2_dentry_lock *dl);
void ocfs2_drop_dl_inodes(struct work_struct *work);
void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb);
struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
int skip_unhashed);

View file

@ -103,7 +103,6 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
lock->ast_pending, lock->ml.type);
BUG();
}
BUG_ON(!list_empty(&lock->ast_list));
if (lock->ast_pending)
mlog(0, "lock has an ast getting flushed right now\n");

View file

@ -1118,7 +1118,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n",
dlm->name, res->lockname.len, res->lockname.name,
orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery",
orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery",
send_to);
/* send it */

View file

@ -1851,6 +1851,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
if (ret)
goto out_dio;
count = ocount;
ret = generic_write_checks(file, ppos, &count,
S_ISBLK(inode->i_mode));
if (ret)
@ -1918,8 +1919,10 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
mutex_unlock(&inode->i_mutex);
if (written)
ret = written;
mlog_exit(ret);
return written ? written : ret;
return ret;
}
static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,

View file

@ -1954,10 +1954,16 @@ void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
os->os_osb = osb;
os->os_count = 0;
os->os_seqno = 0;
os->os_scantime = CURRENT_TIME;
mutex_init(&os->os_lock);
INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
}
void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
{
struct ocfs2_orphan_scan *os;
os = &osb->osb_orphan_scan;
os->os_scantime = CURRENT_TIME;
if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
else {

View file

@ -145,6 +145,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
/* Exported only for the journal struct init code in super.c. Do not call. */
void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
void ocfs2_orphan_scan_start(struct ocfs2_super *osb);
void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
@ -329,20 +330,27 @@ int ocfs2_journal_dirty(handle_t *handle,
/* extended attribute block update */
#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
/* global quotafile inode update, data block */
#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
/* Update of a single quota block */
#define OCFS2_QUOTA_BLOCK_UPDATE_CREDITS 1
/* global quotafile inode update, data block */
#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \
OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
#define OCFS2_LOCAL_QINFO_WRITE_CREDITS OCFS2_QUOTA_BLOCK_UPDATE_CREDITS
/*
* The two writes below can accidentally see global info dirty due
* to set_info() quotactl so make them prepared for the writes.
*/
/* quota data block, global info */
/* Write to local quota file */
#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1)
#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
/* global quota data block, local quota data block, global quota inode,
* global quota info */
#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3)
#define OCFS2_QSYNC_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
static inline int ocfs2_quota_trans_credits(struct super_block *sb)
{
@ -355,11 +363,6 @@ static inline int ocfs2_quota_trans_credits(struct super_block *sb)
return credits;
}
/* Number of credits needed for removing quota structure from file */
int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
/* Number of credits needed for initialization of new quota structure */
int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
/* group extend. inode update and last group update. */
#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)

View file

@ -224,10 +224,12 @@ enum ocfs2_mount_options
OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
};
#define OCFS2_OSB_SOFT_RO 0x0001
#define OCFS2_OSB_HARD_RO 0x0002
#define OCFS2_OSB_ERROR_FS 0x0004
#define OCFS2_DEFAULT_ATIME_QUANTUM 60
#define OCFS2_OSB_SOFT_RO 0x0001
#define OCFS2_OSB_HARD_RO 0x0002
#define OCFS2_OSB_ERROR_FS 0x0004
#define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008
#define OCFS2_DEFAULT_ATIME_QUANTUM 60
struct ocfs2_journal;
struct ocfs2_slot_info;
@ -490,6 +492,18 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb,
spin_unlock(&osb->osb_lock);
}
static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb,
unsigned long flag)
{
unsigned long ret;
spin_lock(&osb->osb_lock);
ret = osb->osb_flags & flag;
spin_unlock(&osb->osb_lock);
return ret;
}
static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb,
int hard)
{

View file

@ -50,7 +50,6 @@ struct ocfs2_mem_dqinfo {
unsigned int dqi_chunks; /* Number of chunks in local quota file */
unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */
unsigned int dqi_syncms; /* How often should we sync with other nodes */
unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */
struct list_head dqi_chunk; /* List of chunks */
struct inode *dqi_gqinode; /* Global quota file inode */
struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */

View file

@ -69,6 +69,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
d->dqb_btime = cpu_to_le64(m->dqb_btime);
d->dqb_itime = cpu_to_le64(m->dqb_itime);
d->dqb_pad1 = d->dqb_pad2 = 0;
}
static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
@ -211,14 +212,13 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
if (gqinode->i_size < off + len) {
down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
err = ocfs2_extend_no_holes(gqinode, off + len, off);
up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
if (err < 0)
goto out;
loff_t rounded_end =
ocfs2_align_bytes_to_blocks(sb, off + len);
/* Space is already allocated in ocfs2_global_read_dquot() */
err = ocfs2_simple_size_update(gqinode,
oinfo->dqi_gqi_bh,
off + len);
rounded_end);
if (err < 0)
goto out;
new = 1;
@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
}
if (err) {
mlog_errno(err);
return err;
goto out;
}
lock_buffer(bh);
if (new)
@ -342,7 +342,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms);
oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
@ -352,7 +351,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
oinfo->dqi_syncjiff);
msecs_to_jiffies(oinfo->dqi_syncms));
out_err:
mlog_exit(status);
@ -402,13 +401,36 @@ int ocfs2_global_write_info(struct super_block *sb, int type)
return err;
}
static int ocfs2_global_qinit_alloc(struct super_block *sb, int type)
{
struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
/*
* We may need to allocate tree blocks and a leaf block but not the
* root block
*/
return oinfo->dqi_gi.dqi_qtree_depth;
}
static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type)
{
/* We modify all the allocated blocks, tree root, and info block */
return (ocfs2_global_qinit_alloc(sb, type) + 2) *
OCFS2_QUOTA_BLOCK_UPDATE_CREDITS;
}
/* Read in information from global quota file and acquire a reference to it.
* dquot_acquire() has already started the transaction and locked quota file */
int ocfs2_global_read_dquot(struct dquot *dquot)
{
int err, err2, ex = 0;
struct ocfs2_mem_dqinfo *info =
sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
struct super_block *sb = dquot->dq_sb;
int type = dquot->dq_type;
struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
struct ocfs2_super *osb = OCFS2_SB(sb);
struct inode *gqinode = info->dqi_gqinode;
int need_alloc = ocfs2_global_qinit_alloc(sb, type);
handle_t *handle = NULL;
err = ocfs2_qinfo_lock(info, 0);
if (err < 0)
@ -419,14 +441,33 @@ int ocfs2_global_read_dquot(struct dquot *dquot)
OCFS2_DQUOT(dquot)->dq_use_count++;
OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
ocfs2_qinfo_unlock(info, 0);
if (!dquot->dq_off) { /* No real quota entry? */
/* Upgrade to exclusive lock for allocation */
ocfs2_qinfo_unlock(info, 0);
err = ocfs2_qinfo_lock(info, 1);
if (err < 0)
goto out_qlock;
ex = 1;
/*
* Add blocks to quota file before we start a transaction since
* locking allocators ranks above a transaction start
*/
WARN_ON(journal_current_handle());
down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
err = ocfs2_extend_no_holes(gqinode,
gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
gqinode->i_size);
up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
if (err < 0)
goto out;
}
handle = ocfs2_start_trans(osb,
ocfs2_calc_global_qinit_credits(sb, type));
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
goto out;
}
err = ocfs2_qinfo_lock(info, ex);
if (err < 0)
goto out_trans;
err = qtree_write_dquot(&info->dqi_gi, dquot);
if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
@ -438,6 +479,9 @@ int ocfs2_global_read_dquot(struct dquot *dquot)
ocfs2_qinfo_unlock(info, 1);
else
ocfs2_qinfo_unlock(info, 0);
out_trans:
if (handle)
ocfs2_commit_trans(osb, handle);
out:
if (err < 0)
mlog_errno(err);
@ -607,7 +651,7 @@ static void qsync_work_fn(struct work_struct *work)
dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
oinfo->dqi_syncjiff);
msecs_to_jiffies(oinfo->dqi_syncms));
}
/*
@ -635,20 +679,18 @@ static int ocfs2_write_dquot(struct dquot *dquot)
return status;
}
int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
static int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
{
struct ocfs2_mem_dqinfo *oinfo;
int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
return 0;
oinfo = sb_dqinfo(sb, type)->dqi_priv;
/* We modify tree, leaf block, global info, local chunk header,
* global and local inode */
return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 +
2 * OCFS2_INODE_UPDATE_CREDITS;
struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
/*
* We modify tree, leaf block, global info, local chunk header,
* global and local inode; OCFS2_QINFO_WRITE_CREDITS already
* accounts for inode update
*/
return (oinfo->dqi_gi.dqi_qtree_depth + 2) *
OCFS2_QUOTA_BLOCK_UPDATE_CREDITS +
OCFS2_QINFO_WRITE_CREDITS +
OCFS2_INODE_UPDATE_CREDITS;
}
static int ocfs2_release_dquot(struct dquot *dquot)
@ -680,33 +722,10 @@ static int ocfs2_release_dquot(struct dquot *dquot)
return status;
}
int ocfs2_calc_qinit_credits(struct super_block *sb, int type)
{
struct ocfs2_mem_dqinfo *oinfo;
int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
struct ocfs2_dinode *lfe, *gfe;
if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
return 0;
oinfo = sb_dqinfo(sb, type)->dqi_priv;
gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data;
lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data;
/* We can extend local file + global file. In local file we
* can modify info, chunk header block and dquot block. In
* global file we can modify info, tree and leaf block */
return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) +
ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) +
3 + oinfo->dqi_gi.dqi_qtree_depth + 2;
}
static int ocfs2_acquire_dquot(struct dquot *dquot)
{
handle_t *handle;
struct ocfs2_mem_dqinfo *oinfo =
sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
int status = 0;
mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
@ -715,16 +734,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
status = ocfs2_lock_global_qf(oinfo, 1);
if (status < 0)
goto out;
handle = ocfs2_start_trans(osb,
ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type));
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
goto out_ilock;
}
status = dquot_acquire(dquot);
ocfs2_commit_trans(osb, handle);
out_ilock:
ocfs2_unlock_global_qf(oinfo, 1);
out:
mlog_exit(status);

View file

@ -20,6 +20,7 @@
#include "sysfile.h"
#include "dlmglue.h"
#include "quota.h"
#include "uptodate.h"
/* Number of local quota structures per block */
static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
@ -100,7 +101,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
handle_t *handle;
int status;
handle = ocfs2_start_trans(OCFS2_SB(sb), 1);
handle = ocfs2_start_trans(OCFS2_SB(sb),
OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
@ -610,7 +612,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
goto out_bh;
/* Mark quota file as clean if we are recovering quota file of
* some other node. */
handle = ocfs2_start_trans(osb, 1);
handle = ocfs2_start_trans(osb,
OCFS2_LOCAL_QINFO_WRITE_CREDITS);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
@ -940,7 +943,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
struct ocfs2_local_disk_chunk *dchunk;
int status;
handle_t *handle;
struct buffer_head *bh = NULL;
struct buffer_head *bh = NULL, *dbh = NULL;
u64 p_blkno;
/* We are protected by dqio_sem so no locking needed */
@ -964,32 +967,35 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
mlog_errno(status);
goto out;
}
down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
&p_blkno, NULL, NULL);
up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
if (status < 0) {
mlog_errno(status);
goto out;
}
bh = sb_getblk(sb, p_blkno);
if (!bh) {
status = -ENOMEM;
mlog_errno(status);
goto out;
}
dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
/* Local quota info and two new blocks we initialize */
handle = ocfs2_start_trans(OCFS2_SB(sb),
OCFS2_LOCAL_QINFO_WRITE_CREDITS +
2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
goto out;
}
/* Initialize chunk header */
down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
&p_blkno, NULL, NULL);
up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
if (status < 0) {
mlog_errno(status);
goto out_trans;
}
bh = sb_getblk(sb, p_blkno);
if (!bh) {
status = -ENOMEM;
mlog_errno(status);
goto out_trans;
}
dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
ocfs2_set_new_buffer_uptodate(lqinode, bh);
status = ocfs2_journal_access_dq(handle, lqinode, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
OCFS2_JOURNAL_ACCESS_CREATE);
if (status < 0) {
mlog_errno(status);
goto out_trans;
@ -999,7 +1005,6 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
memset(dchunk->dqc_bitmap, 0,
sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
OCFS2_QBLK_RESERVED_SPACE);
set_buffer_uptodate(bh);
unlock_buffer(bh);
status = ocfs2_journal_dirty(handle, bh);
if (status < 0) {
@ -1007,6 +1012,38 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
goto out_trans;
}
/* Initialize new block with structures */
down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1,
&p_blkno, NULL, NULL);
up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
if (status < 0) {
mlog_errno(status);
goto out_trans;
}
dbh = sb_getblk(sb, p_blkno);
if (!dbh) {
status = -ENOMEM;
mlog_errno(status);
goto out_trans;
}
ocfs2_set_new_buffer_uptodate(lqinode, dbh);
status = ocfs2_journal_access_dq(handle, lqinode, dbh,
OCFS2_JOURNAL_ACCESS_CREATE);
if (status < 0) {
mlog_errno(status);
goto out_trans;
}
lock_buffer(dbh);
memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE);
unlock_buffer(dbh);
status = ocfs2_journal_dirty(handle, dbh);
if (status < 0) {
mlog_errno(status);
goto out_trans;
}
/* Update local quotafile info */
oinfo->dqi_blocks += 2;
oinfo->dqi_chunks++;
status = ocfs2_local_write_info(sb, type);
@ -1031,6 +1068,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
ocfs2_commit_trans(OCFS2_SB(sb), handle);
out:
brelse(bh);
brelse(dbh);
kmem_cache_free(ocfs2_qf_chunk_cachep, chunk);
return ERR_PTR(status);
}
@ -1048,6 +1086,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
struct ocfs2_local_disk_chunk *dchunk;
int epb = ol_quota_entries_per_block(sb);
unsigned int chunk_blocks;
struct buffer_head *bh;
u64 p_blkno;
int status;
handle_t *handle;
@ -1075,12 +1115,49 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
mlog_errno(status);
goto out;
}
handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
/* Get buffer from the just added block */
down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
&p_blkno, NULL, NULL);
up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
if (status < 0) {
mlog_errno(status);
goto out;
}
bh = sb_getblk(sb, p_blkno);
if (!bh) {
status = -ENOMEM;
mlog_errno(status);
goto out;
}
ocfs2_set_new_buffer_uptodate(lqinode, bh);
/* Local quota info, chunk header and the new block we initialize */
handle = ocfs2_start_trans(OCFS2_SB(sb),
OCFS2_LOCAL_QINFO_WRITE_CREDITS +
2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
goto out;
}
/* Zero created block */
status = ocfs2_journal_access_dq(handle, lqinode, bh,
OCFS2_JOURNAL_ACCESS_CREATE);
if (status < 0) {
mlog_errno(status);
goto out_trans;
}
lock_buffer(bh);
memset(bh->b_data, 0, sb->s_blocksize);
unlock_buffer(bh);
status = ocfs2_journal_dirty(handle, bh);
if (status < 0) {
mlog_errno(status);
goto out_trans;
}
/* Update chunk header */
status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
@ -1097,6 +1174,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
mlog_errno(status);
goto out_trans;
}
/* Update file header */
oinfo->dqi_blocks++;
status = ocfs2_local_write_info(sb, type);
if (status < 0) {

View file

@ -17,6 +17,7 @@
* General Public License for more details.
*/
#include <linux/kernel.h>
#include <linux/crc32.h>
#include <linux/module.h>
@ -153,7 +154,7 @@ static int status_map[] = {
static int dlm_status_to_errno(enum dlm_status status)
{
BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0])));
BUG_ON(status < 0 || status >= ARRAY_SIZE(status_map));
return status_map[status];
}

View file

@ -777,6 +777,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
}
di = (struct ocfs2_dinode *) (*bh)->b_data;
memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats));
spin_lock_init(&stats->b_lock);
status = ocfs2_verify_volume(di, *bh, blksize, stats);
if (status >= 0)
goto bail;
@ -1182,7 +1183,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
wake_up(&osb->osb_mount_event);
/* Start this when the mount is almost sure of being successful */
ocfs2_orphan_scan_init(osb);
ocfs2_orphan_scan_start(osb);
mlog_exit(status);
return status;
@ -1213,14 +1214,27 @@ static int ocfs2_get_sb(struct file_system_type *fs_type,
mnt);
}
static void ocfs2_kill_sb(struct super_block *sb)
{
struct ocfs2_super *osb = OCFS2_SB(sb);
/* Prevent further queueing of inode drop events */
spin_lock(&dentry_list_lock);
ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED);
spin_unlock(&dentry_list_lock);
/* Wait for work to finish and/or remove it */
cancel_work_sync(&osb->dentry_lock_work);
kill_block_super(sb);
}
static struct file_system_type ocfs2_fs_type = {
.owner = THIS_MODULE,
.name = "ocfs2",
.get_sb = ocfs2_get_sb, /* is this called when we mount
* the fs? */
.kill_sb = kill_block_super, /* set to the generic one
* right now, but do we
* need to change that? */
.kill_sb = ocfs2_kill_sb,
.fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
.next = NULL
};
@ -1819,6 +1833,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
debugfs_remove(osb->osb_ctxt);
/*
* Flush inode dropping work queue so that deletes are
* performed while the filesystem is still working
*/
ocfs2_drop_all_dl_inodes(osb);
/* Orphan scan should be stopped as early as possible */
ocfs2_orphan_scan_stop(osb);
@ -1981,6 +2001,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
ocfs2_orphan_scan_init(osb);
status = ocfs2_recovery_init(osb);
if (status) {
mlog(ML_ERROR, "Unable to initialize recovery state\n");

View file

@ -1052,7 +1052,8 @@ static int ocfs2_xattr_block_get(struct inode *inode,
struct ocfs2_xattr_block *xb;
struct ocfs2_xattr_value_root *xv;
size_t size;
int ret = -ENODATA, name_offset, name_len, block_off, i;
int ret = -ENODATA, name_offset, name_len, i;
int uninitialized_var(block_off);
xs->bucket = ocfs2_xattr_bucket_new(inode);
if (!xs->bucket) {

View file

@ -115,7 +115,7 @@ enum perf_counter_sample_format {
PERF_SAMPLE_TID = 1U << 1,
PERF_SAMPLE_TIME = 1U << 2,
PERF_SAMPLE_ADDR = 1U << 3,
PERF_SAMPLE_GROUP = 1U << 4,
PERF_SAMPLE_READ = 1U << 4,
PERF_SAMPLE_CALLCHAIN = 1U << 5,
PERF_SAMPLE_ID = 1U << 6,
PERF_SAMPLE_CPU = 1U << 7,
@ -127,16 +127,32 @@ enum perf_counter_sample_format {
};
/*
* Bits that can be set in attr.read_format to request that
* reads on the counter should return the indicated quantities,
* in increasing order of bit value, after the counter value.
* The format of the data returned by read() on a perf counter fd,
* as specified by attr.read_format:
*
* struct read_format {
* { u64 value;
* { u64 time_enabled; } && PERF_FORMAT_ENABLED
* { u64 time_running; } && PERF_FORMAT_RUNNING
* { u64 id; } && PERF_FORMAT_ID
* } && !PERF_FORMAT_GROUP
*
* { u64 nr;
* { u64 time_enabled; } && PERF_FORMAT_ENABLED
* { u64 time_running; } && PERF_FORMAT_RUNNING
* { u64 value;
* { u64 id; } && PERF_FORMAT_ID
* } cntr[nr];
* } && PERF_FORMAT_GROUP
* };
*/
enum perf_counter_read_format {
PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
PERF_FORMAT_ID = 1U << 2,
PERF_FORMAT_GROUP = 1U << 3,
PERF_FORMAT_MAX = 1U << 3, /* non-ABI */
PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
};
#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
@ -343,10 +359,8 @@ enum perf_event_type {
* struct {
* struct perf_event_header header;
* u32 pid, tid;
* u64 value;
* { u64 time_enabled; } && PERF_FORMAT_ENABLED
* { u64 time_running; } && PERF_FORMAT_RUNNING
* { u64 parent_id; } && PERF_FORMAT_ID
*
* struct read_format values;
* };
*/
PERF_EVENT_READ = 8,
@ -364,11 +378,22 @@ enum perf_event_type {
* { u32 cpu, res; } && PERF_SAMPLE_CPU
* { u64 period; } && PERF_SAMPLE_PERIOD
*
* { u64 nr;
* { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP
* { struct read_format values; } && PERF_SAMPLE_READ
*
* { u64 nr,
* u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
*
* #
* # The RAW record below is opaque data wrt the ABI
* #
* # That is, the ABI doesn't make any promises wrt to
* # the stability of its content, it may vary depending
* # on event, hardware, kernel version and phase of
* # the moon.
* #
* # In other words, PERF_SAMPLE_RAW contents are not an ABI.
* #
*
* { u32 size;
* char data[size];}&& PERF_SAMPLE_RAW
* };
@ -694,6 +719,8 @@ struct perf_sample_data {
extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
struct perf_sample_data *data);
extern void perf_counter_output(struct perf_counter *counter, int nmi,
struct perf_sample_data *data);
/*
* Return 1 for a software counter, 0 for a hardware counter

View file

@ -77,7 +77,14 @@ struct task_struct;
#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
{ .flags = word, .bit_nr = bit, }
extern void init_waitqueue_head(wait_queue_head_t *q);
extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *);
#define init_waitqueue_head(q) \
do { \
static struct lock_class_key __key; \
\
__init_waitqueue_head((q), &__key); \
} while (0)
#ifdef CONFIG_LOCKDEP
# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \

View file

@ -1010,15 +1010,19 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
* requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
* q: the futex_q
* key: the key of the requeue target futex
* hb: the hash_bucket of the requeue target futex
*
* During futex_requeue, with requeue_pi=1, it is possible to acquire the
* target futex if it is uncontended or via a lock steal. Set the futex_q key
* to the requeue target futex so the waiter can detect the wakeup on the right
* futex, but remove it from the hb and NULL the rt_waiter so it can detect
* atomic lock acquisition. Must be called with the q->lock_ptr held.
* atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
* to protect access to the pi_state to fixup the owner later. Must be called
* with both q->lock_ptr and hb->lock held.
*/
static inline
void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
struct futex_hash_bucket *hb)
{
drop_futex_key_refs(&q->key);
get_futex_key_refs(key);
@ -1030,6 +1034,11 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
WARN_ON(!q->rt_waiter);
q->rt_waiter = NULL;
q->lock_ptr = &hb->lock;
#ifdef CONFIG_DEBUG_PI_LIST
q->list.plist.lock = &hb->lock;
#endif
wake_up_state(q->task, TASK_NORMAL);
}
@ -1088,7 +1097,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
set_waiters);
if (ret == 1)
requeue_pi_wake_futex(top_waiter, key2);
requeue_pi_wake_futex(top_waiter, key2, hb2);
return ret;
}
@ -1247,8 +1256,15 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
if (!match_futex(&this->key, &key1))
continue;
WARN_ON(!requeue_pi && this->rt_waiter);
WARN_ON(requeue_pi && !this->rt_waiter);
/*
* FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
* be paired with each other and no other futex ops.
*/
if ((requeue_pi && !this->rt_waiter) ||
(!requeue_pi && this->rt_waiter)) {
ret = -EINVAL;
break;
}
/*
* Wake nr_wake waiters. For requeue_pi, if we acquired the
@ -1273,7 +1289,7 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
this->task, 1);
if (ret == 1) {
/* We got the lock. */
requeue_pi_wake_futex(this, &key2);
requeue_pi_wake_futex(this, &key2, hb2);
continue;
} else if (ret) {
/* -EDEADLK */

View file

@ -180,7 +180,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
int cmd = op & FUTEX_CMD_MASK;
if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
cmd == FUTEX_WAIT_BITSET)) {
cmd == FUTEX_WAIT_BITSET ||
cmd == FUTEX_WAIT_REQUEUE_PI)) {
if (get_compat_timespec(&ts, utime))
return -EFAULT;
if (!timespec_valid(&ts))
@ -191,7 +192,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
t = ktime_add_safe(ktime_get(), t);
tp = &t;
}
if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE)
if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
val2 = (int) (unsigned long) utime;
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);

View file

@ -761,7 +761,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction *action, **action_ptr;
struct task_struct *irqthread;
unsigned long flags;
WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@ -809,9 +808,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
desc->chip->disable(irq);
}
irqthread = action->thread;
action->thread = NULL;
spin_unlock_irqrestore(&desc->lock, flags);
unregister_handler_proc(irq, action);
@ -819,12 +815,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
/* Make sure it's not being used on another CPU: */
synchronize_irq(irq);
if (irqthread) {
if (!test_bit(IRQTF_DIED, &action->thread_flags))
kthread_stop(irqthread);
put_task_struct(irqthread);
}
#ifdef CONFIG_DEBUG_SHIRQ
/*
* It's a shared IRQ -- the driver ought to be prepared for an IRQ
@ -840,6 +830,13 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
local_irq_restore(flags);
}
#endif
if (action->thread) {
if (!test_bit(IRQTF_DIED, &action->thread_flags))
kthread_stop(action->thread);
put_task_struct(action->thread);
}
return action;
}

View file

@ -88,6 +88,7 @@ void __weak hw_perf_disable(void) { barrier(); }
void __weak hw_perf_enable(void) { barrier(); }
void __weak hw_perf_counter_setup(int cpu) { barrier(); }
void __weak hw_perf_counter_setup_online(int cpu) { barrier(); }
int __weak
hw_perf_group_sched_in(struct perf_counter *group_leader,
@ -306,6 +307,10 @@ counter_sched_out(struct perf_counter *counter,
return;
counter->state = PERF_COUNTER_STATE_INACTIVE;
if (counter->pending_disable) {
counter->pending_disable = 0;
counter->state = PERF_COUNTER_STATE_OFF;
}
counter->tstamp_stopped = ctx->time;
counter->pmu->disable(counter);
counter->oncpu = -1;
@ -1691,7 +1696,32 @@ static int perf_release(struct inode *inode, struct file *file)
return 0;
}
static u64 perf_counter_read_tree(struct perf_counter *counter)
static int perf_counter_read_size(struct perf_counter *counter)
{
int entry = sizeof(u64); /* value */
int size = 0;
int nr = 1;
if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
size += sizeof(u64);
if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
size += sizeof(u64);
if (counter->attr.read_format & PERF_FORMAT_ID)
entry += sizeof(u64);
if (counter->attr.read_format & PERF_FORMAT_GROUP) {
nr += counter->group_leader->nr_siblings;
size += sizeof(u64);
}
size += entry * nr;
return size;
}
static u64 perf_counter_read_value(struct perf_counter *counter)
{
struct perf_counter *child;
u64 total = 0;
@ -1703,14 +1733,96 @@ static u64 perf_counter_read_tree(struct perf_counter *counter)
return total;
}
static int perf_counter_read_entry(struct perf_counter *counter,
u64 read_format, char __user *buf)
{
int n = 0, count = 0;
u64 values[2];
values[n++] = perf_counter_read_value(counter);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_counter_id(counter);
count = n * sizeof(u64);
if (copy_to_user(buf, values, count))
return -EFAULT;
return count;
}
static int perf_counter_read_group(struct perf_counter *counter,
u64 read_format, char __user *buf)
{
struct perf_counter *leader = counter->group_leader, *sub;
int n = 0, size = 0, err = -EFAULT;
u64 values[3];
values[n++] = 1 + leader->nr_siblings;
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
values[n++] = leader->total_time_enabled +
atomic64_read(&leader->child_total_time_enabled);
}
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
values[n++] = leader->total_time_running +
atomic64_read(&leader->child_total_time_running);
}
size = n * sizeof(u64);
if (copy_to_user(buf, values, size))
return -EFAULT;
err = perf_counter_read_entry(leader, read_format, buf + size);
if (err < 0)
return err;
size += err;
list_for_each_entry(sub, &leader->sibling_list, list_entry) {
err = perf_counter_read_entry(counter, read_format,
buf + size);
if (err < 0)
return err;
size += err;
}
return size;
}
static int perf_counter_read_one(struct perf_counter *counter,
u64 read_format, char __user *buf)
{
u64 values[4];
int n = 0;
values[n++] = perf_counter_read_value(counter);
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
values[n++] = counter->total_time_enabled +
atomic64_read(&counter->child_total_time_enabled);
}
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
values[n++] = counter->total_time_running +
atomic64_read(&counter->child_total_time_running);
}
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_counter_id(counter);
if (copy_to_user(buf, values, n * sizeof(u64)))
return -EFAULT;
return n * sizeof(u64);
}
/*
* Read the performance counter - simple non blocking version for now
*/
static ssize_t
perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
{
u64 values[4];
int n;
u64 read_format = counter->attr.read_format;
int ret;
/*
* Return end-of-file for a read on a counter that is in
@ -1720,28 +1832,18 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
if (counter->state == PERF_COUNTER_STATE_ERROR)
return 0;
if (count < perf_counter_read_size(counter))
return -ENOSPC;
WARN_ON_ONCE(counter->ctx->parent_ctx);
mutex_lock(&counter->child_mutex);
values[0] = perf_counter_read_tree(counter);
n = 1;
if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
values[n++] = counter->total_time_enabled +
atomic64_read(&counter->child_total_time_enabled);
if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
values[n++] = counter->total_time_running +
atomic64_read(&counter->child_total_time_running);
if (counter->attr.read_format & PERF_FORMAT_ID)
values[n++] = primary_counter_id(counter);
if (read_format & PERF_FORMAT_GROUP)
ret = perf_counter_read_group(counter, read_format, buf);
else
ret = perf_counter_read_one(counter, read_format, buf);
mutex_unlock(&counter->child_mutex);
if (count < n * sizeof(u64))
return -EINVAL;
count = n * sizeof(u64);
if (copy_to_user(buf, values, count))
return -EFAULT;
return count;
return ret;
}
static ssize_t
@ -2245,7 +2347,7 @@ static void perf_pending_counter(struct perf_pending_entry *entry)
if (counter->pending_disable) {
counter->pending_disable = 0;
perf_counter_disable(counter);
__perf_counter_disable(counter);
}
if (counter->pending_wakeup) {
@ -2630,7 +2732,80 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
return task_pid_nr_ns(p, counter->ns);
}
static void perf_counter_output(struct perf_counter *counter, int nmi,
static void perf_output_read_one(struct perf_output_handle *handle,
struct perf_counter *counter)
{
u64 read_format = counter->attr.read_format;
u64 values[4];
int n = 0;
values[n++] = atomic64_read(&counter->count);
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
values[n++] = counter->total_time_enabled +
atomic64_read(&counter->child_total_time_enabled);
}
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
values[n++] = counter->total_time_running +
atomic64_read(&counter->child_total_time_running);
}
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_counter_id(counter);
perf_output_copy(handle, values, n * sizeof(u64));
}
/*
* XXX PERF_FORMAT_GROUP vs inherited counters seems difficult.
*/
static void perf_output_read_group(struct perf_output_handle *handle,
struct perf_counter *counter)
{
struct perf_counter *leader = counter->group_leader, *sub;
u64 read_format = counter->attr.read_format;
u64 values[5];
int n = 0;
values[n++] = 1 + leader->nr_siblings;
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
values[n++] = leader->total_time_enabled;
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
values[n++] = leader->total_time_running;
if (leader != counter)
leader->pmu->read(leader);
values[n++] = atomic64_read(&leader->count);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_counter_id(leader);
perf_output_copy(handle, values, n * sizeof(u64));
list_for_each_entry(sub, &leader->sibling_list, list_entry) {
n = 0;
if (sub != counter)
sub->pmu->read(sub);
values[n++] = atomic64_read(&sub->count);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_counter_id(sub);
perf_output_copy(handle, values, n * sizeof(u64));
}
}
static void perf_output_read(struct perf_output_handle *handle,
struct perf_counter *counter)
{
if (counter->attr.read_format & PERF_FORMAT_GROUP)
perf_output_read_group(handle, counter);
else
perf_output_read_one(handle, counter);
}
void perf_counter_output(struct perf_counter *counter, int nmi,
struct perf_sample_data *data)
{
int ret;
@ -2641,10 +2816,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
struct {
u32 pid, tid;
} tid_entry;
struct {
u64 id;
u64 counter;
} group_entry;
struct perf_callchain_entry *callchain = NULL;
int callchain_size = 0;
u64 time;
@ -2699,10 +2870,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
if (sample_type & PERF_SAMPLE_PERIOD)
header.size += sizeof(u64);
if (sample_type & PERF_SAMPLE_GROUP) {
header.size += sizeof(u64) +
counter->nr_siblings * sizeof(group_entry);
}
if (sample_type & PERF_SAMPLE_READ)
header.size += perf_counter_read_size(counter);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
callchain = perf_callchain(data->regs);
@ -2759,26 +2928,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
if (sample_type & PERF_SAMPLE_PERIOD)
perf_output_put(&handle, data->period);
/*
* XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
*/
if (sample_type & PERF_SAMPLE_GROUP) {
struct perf_counter *leader, *sub;
u64 nr = counter->nr_siblings;
perf_output_put(&handle, nr);
leader = counter->group_leader;
list_for_each_entry(sub, &leader->sibling_list, list_entry) {
if (sub != counter)
sub->pmu->read(sub);
group_entry.id = primary_counter_id(sub);
group_entry.counter = atomic64_read(&sub->count);
perf_output_put(&handle, group_entry);
}
}
if (sample_type & PERF_SAMPLE_READ)
perf_output_read(&handle, counter);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
if (callchain)
@ -2817,8 +2968,6 @@ struct perf_read_event {
u32 pid;
u32 tid;
u64 value;
u64 format[3];
};
static void
@ -2830,34 +2979,20 @@ perf_counter_read_event(struct perf_counter *counter,
.header = {
.type = PERF_EVENT_READ,
.misc = 0,
.size = sizeof(event) - sizeof(event.format),
.size = sizeof(event) + perf_counter_read_size(counter),
},
.pid = perf_counter_pid(counter, task),
.tid = perf_counter_tid(counter, task),
.value = atomic64_read(&counter->count),
};
int ret, i = 0;
if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
event.header.size += sizeof(u64);
event.format[i++] = counter->total_time_enabled;
}
if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
event.header.size += sizeof(u64);
event.format[i++] = counter->total_time_running;
}
if (counter->attr.read_format & PERF_FORMAT_ID) {
event.header.size += sizeof(u64);
event.format[i++] = primary_counter_id(counter);
}
int ret;
ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
if (ret)
return;
perf_output_copy(&handle, &event, event.header.size);
perf_output_put(&handle, event);
perf_output_read(&handle, counter);
perf_output_end(&handle);
}
@ -2893,10 +3028,10 @@ static void perf_counter_task_output(struct perf_counter *counter,
return;
task_event->event.pid = perf_counter_pid(counter, task);
task_event->event.ppid = perf_counter_pid(counter, task->real_parent);
task_event->event.ppid = perf_counter_pid(counter, current);
task_event->event.tid = perf_counter_tid(counter, task);
task_event->event.ptid = perf_counter_tid(counter, task->real_parent);
task_event->event.ptid = perf_counter_tid(counter, current);
perf_output_put(&handle, task_event->event);
perf_output_end(&handle);
@ -3443,40 +3578,32 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
static int perf_swcounter_is_counting(struct perf_counter *counter)
{
struct perf_counter_context *ctx;
unsigned long flags;
int count;
/*
* The counter is active, we're good!
*/
if (counter->state == PERF_COUNTER_STATE_ACTIVE)
return 1;
/*
* The counter is off/error, not counting.
*/
if (counter->state != PERF_COUNTER_STATE_INACTIVE)
return 0;
/*
* If the counter is inactive, it could be just because
* its task is scheduled out, or because it's in a group
* which could not go on the PMU. We want to count in
* the first case but not the second. If the context is
* currently active then an inactive software counter must
* be the second case. If it's not currently active then
* we need to know whether the counter was active when the
* context was last active, which we can determine by
* comparing counter->tstamp_stopped with ctx->time.
*
* We are within an RCU read-side critical section,
* which protects the existence of *ctx.
* The counter is inactive, if the context is active
* we're part of a group that didn't make it on the 'pmu',
* not counting.
*/
ctx = counter->ctx;
spin_lock_irqsave(&ctx->lock, flags);
count = 1;
/* Re-check state now we have the lock */
if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
counter->ctx->is_active ||
counter->tstamp_stopped < ctx->time)
count = 0;
spin_unlock_irqrestore(&ctx->lock, flags);
return count;
if (counter->ctx->is_active)
return 0;
/*
* We're inactive and the context is too, this means the
* task is scheduled out, we're counting events that happen
* to us, like migration events.
*/
return 1;
}
static int perf_swcounter_match(struct perf_counter *counter,
@ -3928,9 +4055,9 @@ perf_counter_alloc(struct perf_counter_attr *attr,
atomic64_set(&hwc->period_left, hwc->sample_period);
/*
* we currently do not support PERF_SAMPLE_GROUP on inherited counters
* we currently do not support PERF_FORMAT_GROUP on inherited counters
*/
if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
goto done;
switch (attr->type) {
@ -4592,6 +4719,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
perf_counter_init_cpu(cpu);
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
hw_perf_counter_setup_online(cpu);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
perf_counter_exit_cpu(cpu);
@ -4616,6 +4748,8 @@ void __init perf_counter_init(void)
{
perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
(void *)(long)smp_processor_id());
register_cpu_notifier(&perf_cpu_nb);
}

View file

@ -10,13 +10,14 @@
#include <linux/wait.h>
#include <linux/hash.h>
void init_waitqueue_head(wait_queue_head_t *q)
void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
{
spin_lock_init(&q->lock);
lockdep_set_class(&q->lock, key);
INIT_LIST_HEAD(&q->task_list);
}
EXPORT_SYMBOL(init_waitqueue_head);
EXPORT_SYMBOL(__init_waitqueue_head);
void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{

View file

@ -736,7 +736,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
if (more)
flags |= MSG_MORE;
return sock->ops->sendpage(sock, page, offset, size, flags);
return kernel_sendpage(sock, page, offset, size, flags);
}
static ssize_t sock_splice_read(struct file *file, loff_t *ppos,

View file

@ -382,22 +382,29 @@ endif
ifdef NO_DEMANGLE
BASIC_CFLAGS += -DNO_DEMANGLE
else
has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y")
has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
ifeq ($(has_bfd),y)
EXTLIBS += -lbfd
else ifeq ($(has_bfd_iberty),y)
EXTLIBS += -lbfd -liberty
else ifeq ($(has_bfd_iberty_z),y)
EXTLIBS += -lbfd -liberty -lz
else
msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
BASIC_CFLAGS += -DNO_DEMANGLE
has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
ifeq ($(has_bfd_iberty),y)
EXTLIBS += -lbfd -liberty
else
has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
ifeq ($(has_bfd_iberty_z),y)
EXTLIBS += -lbfd -liberty -lz
else
has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -liberty > /dev/null 2>&1 && echo y")
ifeq ($(has_cplus_demangle),y)
EXTLIBS += -liberty
BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
else
msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
BASIC_CFLAGS += -DNO_DEMANGLE
endif
endif
endif
endif
endif

View file

@ -10,11 +10,12 @@
#include "perf.h"
#include "util/parse-options.h"
#include "util/parse-events.h"
#include "util/cache.h"
int cmd_list(int argc __used, const char **argv __used, const char *prefix __used)
{
setup_pager();
print_events();
return 0;
}

View file

@ -34,7 +34,9 @@ static int output;
static const char *output_name = "perf.data";
static int group = 0;
static unsigned int realtime_prio = 0;
static int raw_samples = 0;
static int system_wide = 0;
static int profile_cpu = -1;
static pid_t target_pid = -1;
static int inherit = 1;
static int force = 0;
@ -203,46 +205,48 @@ static void sig_atexit(void)
kill(getpid(), signr);
}
static void pid_synthesize_comm_event(pid_t pid, int full)
static pid_t pid_synthesize_comm_event(pid_t pid, int full)
{
struct comm_event comm_ev;
char filename[PATH_MAX];
char bf[BUFSIZ];
int fd;
size_t size;
char *field, *sep;
FILE *fp;
size_t size = 0;
DIR *tasks;
struct dirent dirent, *next;
pid_t tgid = 0;
snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
fd = open(filename, O_RDONLY);
if (fd < 0) {
fp = fopen(filename, "r");
if (fd == NULL) {
/*
* We raced with a task exiting - just return:
*/
if (verbose)
fprintf(stderr, "couldn't open %s\n", filename);
return;
return 0;
}
if (read(fd, bf, sizeof(bf)) < 0) {
fprintf(stderr, "couldn't read %s\n", filename);
exit(EXIT_FAILURE);
}
close(fd);
/* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
memset(&comm_ev, 0, sizeof(comm_ev));
field = strchr(bf, '(');
if (field == NULL)
goto out_failure;
sep = strchr(++field, ')');
if (sep == NULL)
goto out_failure;
size = sep - field;
memcpy(comm_ev.comm, field, size++);
while (!comm_ev.comm[0] || !comm_ev.pid) {
if (fgets(bf, sizeof(bf), fp) == NULL)
goto out_failure;
if (memcmp(bf, "Name:", 5) == 0) {
char *name = bf + 5;
while (*name && isspace(*name))
++name;
size = strlen(name) - 1;
memcpy(comm_ev.comm, name, size++);
} else if (memcmp(bf, "Tgid:", 5) == 0) {
char *tgids = bf + 5;
while (*tgids && isspace(*tgids))
++tgids;
tgid = comm_ev.pid = atoi(tgids);
}
}
comm_ev.pid = pid;
comm_ev.header.type = PERF_EVENT_COMM;
size = ALIGN(size, sizeof(u64));
comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
@ -251,7 +255,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
comm_ev.tid = pid;
write_output(&comm_ev, comm_ev.header.size);
return;
goto out_fclose;
}
snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@ -268,7 +272,10 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
write_output(&comm_ev, comm_ev.header.size);
}
closedir(tasks);
return;
out_fclose:
fclose(fp);
return tgid;
out_failure:
fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
@ -276,7 +283,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
exit(EXIT_FAILURE);
}
static void pid_synthesize_mmap_samples(pid_t pid)
static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid)
{
char filename[PATH_MAX];
FILE *fp;
@ -328,7 +335,7 @@ static void pid_synthesize_mmap_samples(pid_t pid)
mmap_ev.len -= mmap_ev.start;
mmap_ev.header.size = (sizeof(mmap_ev) -
(sizeof(mmap_ev.filename) - size));
mmap_ev.pid = pid;
mmap_ev.pid = tgid;
mmap_ev.tid = pid;
write_output(&mmap_ev, mmap_ev.header.size);
@ -347,14 +354,14 @@ static void synthesize_all(void)
while (!readdir_r(proc, &dirent, &next) && next) {
char *end;
pid_t pid;
pid_t pid, tgid;
pid = strtol(dirent.d_name, &end, 10);
if (*end) /* only interested in proper numerical dirents */
continue;
pid_synthesize_comm_event(pid, 1);
pid_synthesize_mmap_samples(pid);
tgid = pid_synthesize_comm_event(pid, 1);
pid_synthesize_mmap_samples(pid, tgid);
}
closedir(proc);
@ -392,7 +399,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
PERF_FORMAT_TOTAL_TIME_RUNNING |
PERF_FORMAT_ID;
attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
if (freq) {
attr->sample_type |= PERF_SAMPLE_PERIOD;
@ -412,6 +419,8 @@ static void create_counter(int counter, int cpu, pid_t pid)
if (call_graph)
attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
if (raw_samples)
attr->sample_type |= PERF_SAMPLE_RAW;
attr->mmap = track;
attr->comm = track;
@ -426,6 +435,8 @@ static void create_counter(int counter, int cpu, pid_t pid)
if (err == EPERM)
die("Permission error - are you root?\n");
else if (err == ENODEV && profile_cpu != -1)
die("No such device - did you specify an out-of-range profile CPU?\n");
/*
* If it's cycles then fall back to hrtimer
@ -559,16 +570,22 @@ static int __cmd_record(int argc, const char **argv)
if (pid == -1)
pid = getpid();
open_counters(-1, pid);
} else for (i = 0; i < nr_cpus; i++)
open_counters(i, target_pid);
open_counters(profile_cpu, pid);
} else {
if (profile_cpu != -1) {
open_counters(profile_cpu, target_pid);
} else {
for (i = 0; i < nr_cpus; i++)
open_counters(i, target_pid);
}
}
if (file_new)
perf_header__write(header, output);
if (!system_wide) {
pid_synthesize_comm_event(pid, 0);
pid_synthesize_mmap_samples(pid);
pid_t tgid = pid_synthesize_comm_event(pid, 0);
pid_synthesize_mmap_samples(pid, tgid);
} else
synthesize_all();
@ -636,10 +653,14 @@ static const struct option options[] = {
"record events on existing pid"),
OPT_INTEGER('r', "realtime", &realtime_prio,
"collect data with this RT SCHED_FIFO priority"),
OPT_BOOLEAN('R', "raw-samples", &raw_samples,
"collect raw sample records from all opened counters"),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_BOOLEAN('A', "append", &append_file,
"append to the output file to do incremental profiling"),
OPT_INTEGER('C', "profile_cpu", &profile_cpu,
"CPU to profile on"),
OPT_BOOLEAN('f', "force", &force,
"overwrite existing data file"),
OPT_LONG('c', "count", &default_interval,

View file

@ -1526,11 +1526,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
more_data += sizeof(u64);
}
dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n",
dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
(void *)(offset + head),
(void *)(long)(event->header.size),
event->header.misc,
event->ip.pid,
event->ip.pid, event->ip.tid,
(void *)(long)ip,
(long long)period);
@ -1590,10 +1590,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
if (show & show_mask) {
struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip);
if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name))
if (dso_list && (!dso || !dso->name ||
!strlist__has_entry(dso_list, dso->name)))
return 0;
if (sym_list && sym && !strlist__has_entry(sym_list, sym->name))
if (sym_list && (!sym || !strlist__has_entry(sym_list, sym->name)))
return 0;
if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
@ -1612,10 +1613,11 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
struct thread *thread = threads__findnew(event->mmap.pid);
struct map *map = map__new(&event->mmap);
dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
dprintf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
(void *)(offset + head),
(void *)(long)(event->header.size),
event->mmap.pid,
event->mmap.tid,
(void *)(long)event->mmap.start,
(void *)(long)event->mmap.len,
(void *)(long)event->mmap.pgoff,

View file

@ -379,6 +379,7 @@ static int parse_tracepoint_event(const char **strp,
struct perf_counter_attr *attr)
{
const char *evt_name;
char *flags;
char sys_name[MAX_EVENT_LENGTH];
char id_buf[4];
int fd;
@ -400,6 +401,15 @@ static int parse_tracepoint_event(const char **strp,
strncpy(sys_name, *strp, sys_length);
sys_name[sys_length] = '\0';
evt_name = evt_name + 1;
flags = strchr(evt_name, ':');
if (flags) {
*flags = '\0';
flags++;
if (!strncmp(flags, "record", strlen(flags)))
attr->sample_type |= PERF_SAMPLE_RAW;
}
evt_length = strlen(evt_name);
if (evt_length >= MAX_EVENT_LENGTH)
return 0;

View file

@ -7,23 +7,8 @@
#include <gelf.h>
#include <elf.h>
#ifndef NO_DEMANGLE
#include <bfd.h>
#else
static inline
char *bfd_demangle(void __used *v, const char __used *c, int __used i)
{
return NULL;
}
#endif
const char *sym_hist_filter;
#ifndef DMGL_PARAMS
#define DMGL_PARAMS (1 << 0) /* Include function args */
#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
#endif
enum dso_origin {
DSO__ORIG_KERNEL = 0,
DSO__ORIG_JAVA_JIT,
@ -816,6 +801,8 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
}
out:
free(name);
if (ret < 0 && strstr(self->name, " (deleted)") != NULL)
return 0;
return ret;
}

View file

@ -7,6 +7,30 @@
#include <linux/rbtree.h>
#include "module.h"
#ifdef HAVE_CPLUS_DEMANGLE
extern char *cplus_demangle(const char *, int);
static inline char *bfd_demangle(void __used *v, const char *c, int i)
{
return cplus_demangle(c, i);
}
#else
#ifdef NO_DEMANGLE
static inline char *bfd_demangle(void __used *v, const char __used *c,
int __used i)
{
return NULL;
}
#else
#include <bfd.h>
#endif
#endif
#ifndef DMGL_PARAMS
#define DMGL_PARAMS (1 << 0) /* Include function args */
#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
#endif
struct symbol {
struct rb_node rb_node;
u64 start;