Index: linux-cfs-2.6.23.1.q/Documentation/stable_api_nonsense.txt =================================================================== --- linux-cfs-2.6.23.1.q.orig/Documentation/stable_api_nonsense.txt +++ linux-cfs-2.6.23.1.q/Documentation/stable_api_nonsense.txt @@ -62,6 +62,9 @@ consider the following facts about the L - different structures can contain different fields - Some functions may not be implemented at all, (i.e. some locks compile away to nothing for non-SMP builds.) + - Parameter passing of variables from function to function can be + done in different ways (the CONFIG_REGPARM option controls + this.) - Memory within the kernel can be aligned in different ways, depending on the build options. - Linux runs on a wide range of different processor architectures. Index: linux-cfs-2.6.23.1.q/Makefile =================================================================== --- linux-cfs-2.6.23.1.q.orig/Makefile +++ linux-cfs-2.6.23.1.q/Makefile @@ -491,10 +491,15 @@ endif include $(srctree)/arch/$(ARCH)/Makefile -ifdef CONFIG_FRAME_POINTER -CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls + +ifdef CONFIG_MCOUNT +CFLAGS += -pg -fno-omit-frame-pointer -fno-optimize-sibling-calls else -CFLAGS += -fomit-frame-pointer + ifdef CONFIG_FRAME_POINTER + CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls + else + CFLAGS += -fomit-frame-pointer + endif endif ifdef CONFIG_DEBUG_INFO Index: linux-cfs-2.6.23.1.q/arch/arm/boot/compressed/head.S =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/arm/boot/compressed/head.S +++ linux-cfs-2.6.23.1.q/arch/arm/boot/compressed/head.S @@ -928,6 +928,19 @@ memdump: mov r12, r0 #endif .ltorg +#ifdef CONFIG_MCOUNT +/* CONFIG_MCOUNT causes boot header to be built with -pg requiring this + * trampoline + */ + .text + .align 0 + .type mcount %function + .global mcount +mcount: + mov pc, lr @ just return +#endif + + reloc_end: .align Index: linux-cfs-2.6.23.1.q/arch/arm/kernel/entry-common.S =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/arm/kernel/entry-common.S +++ linux-cfs-2.6.23.1.q/arch/arm/kernel/entry-common.S @@ -3,6 +3,8 @@ * * Copyright (C) 2000 Russell King * + * FUNCTION_TRACE/mcount support (C) 2005 Timesys john.cooper@timesys.com + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -395,5 +397,112 @@ ENTRY(sys_oabi_call_table) #undef ABI #undef OBSOLETE +#ifdef CONFIG_FRAME_POINTER + +#ifdef CONFIG_MCOUNT +/* + * At the point where we are in mcount() we maintain the + * frame of the prologue code and keep the call to mcount() + * out of the stack frame list: + + saved pc <---\ caller of instrumented routine + saved lr | + ip/prev_sp | + fp -----^ | + : | + | + -> saved pc | instrumented routine + | saved lr | + | ip/prev_sp | + | fp ---------/ + | : + | + | mcount + | saved pc + | saved lr + | ip/prev sp + -- fp + r3 + r2 + r1 + sp-> r0 + : + */ + + .text + .align 0 + .type mcount %function + .global mcount + +/* gcc -pg generated FUNCTION_PROLOGUE references mcount() + * and has already created the stack frame invocation for + * the routine we have been called to instrument. We create + * a complete frame nevertheless, as we want to use the same + * call to mcount() from c code. + */ +mcount: + + ldr ip, =mcount_enabled @ leave early, if disabled + ldr ip, [ip] + cmp ip, #0 + moveq pc,lr + + mov ip, sp + stmdb sp!, {r0 - r3, fp, ip, lr, pc} @ create stack frame + + ldr r1, [fp, #-4] @ get lr (the return address + @ of the caller of the + @ instrumented function) + mov r0, lr @ get lr - (the return address + @ of the instrumented function) + + sub fp, ip, #4 @ point fp at this frame + + bl __trace +1: + ldmdb fp, {r0 - r3, fp, sp, pc} @ pop entry frame and return + +#endif + +/* ARM replacement for unsupported gcc __builtin_return_address(n) + * where 0 < n. n == 0 is supported here as well. + * + * Walk up the stack frame until the desired frame is found or a NULL + * fp is encountered, return NULL in the latter case. + * + * Note: it is possible under code optimization for the stack invocation + * of an ancestor function (level N) to be removed before calling a + * descendant function (level N+1). No easy means is available to deduce + * this scenario with the result being [for example] caller_addr(0) when + * called from level N+1 returning level N-1 rather than the expected + * level N. This optimization issue appears isolated to the case of + * a call to a level N+1 routine made at the tail end of a level N + * routine -- the level N frame is deleted and a simple branch is made + * to the level N+1 routine. + */ + + .text + .align 0 + .type arm_return_addr %function + .global arm_return_addr + +arm_return_addr: + mov ip, r0 + mov r0, fp +3: + cmp r0, #0 + beq 1f @ frame list hit end, bail + cmp ip, #0 + beq 2f @ reached desired frame + ldr r0, [r0, #-12] @ else continue, get next fp + sub ip, ip, #1 + b 3b +2: + ldr r0, [r0, #-4] @ get target return address +1: + mov pc, lr + +#endif + #endif Index: linux-cfs-2.6.23.1.q/arch/arm/kernel/fiq.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/arm/kernel/fiq.c +++ linux-cfs-2.6.23.1.q/arch/arm/kernel/fiq.c @@ -89,7 +89,7 @@ void set_fiq_handler(void *start, unsign * disable irqs for the duration. Note - these functions are almost * entirely coded in assembly. */ -void __attribute__((naked)) set_fiq_regs(struct pt_regs *regs) +void notrace __attribute__((naked)) set_fiq_regs(struct pt_regs *regs) { register unsigned long tmp; asm volatile ( @@ -107,7 +107,7 @@ void __attribute__((naked)) set_fiq_regs : "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | FIQ_MODE)); } -void __attribute__((naked)) get_fiq_regs(struct pt_regs *regs) +void notrace __attribute__((naked)) get_fiq_regs(struct pt_regs *regs) { register unsigned long tmp; asm volatile ( Index: linux-cfs-2.6.23.1.q/arch/arm/kernel/irq.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/arm/kernel/irq.c +++ linux-cfs-2.6.23.1.q/arch/arm/kernel/irq.c @@ -108,11 +108,13 @@ static struct irq_desc bad_irq_desc = { * come via this function. Instead, they should provide their * own 'handler' */ -asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs) +asmlinkage void __exception notrace asm_do_IRQ(unsigned int irq, struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); struct irq_desc *desc = irq_desc + irq; + trace_special(instruction_pointer(regs), irq, 0); + /* * Some hardware gives randomly wrong interrupts. Rather * than crashing, do something sensible. Index: linux-cfs-2.6.23.1.q/arch/arm/kernel/traps.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/arm/kernel/traps.c +++ linux-cfs-2.6.23.1.q/arch/arm/kernel/traps.c @@ -354,6 +354,7 @@ asmlinkage void do_unexp_fiq (struct pt_ { printk("Hmm. Unexpected FIQ received, but trying to continue\n"); printk("You may have a hardware problem...\n"); + print_traces(current); } /* Index: linux-cfs-2.6.23.1.q/arch/arm/mm/copypage-v4mc.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/arm/mm/copypage-v4mc.c +++ linux-cfs-2.6.23.1.q/arch/arm/mm/copypage-v4mc.c @@ -44,7 +44,7 @@ static DEFINE_SPINLOCK(minicache_lock); * instruction. If your processor does not supply this, you have to write your * own copy_user_page that does the right thing. */ -static void __attribute__((naked)) +static void notrace __attribute__((naked)) mc_copy_user_page(void *from, void *to) { asm volatile( @@ -88,7 +88,7 @@ void v4_mc_copy_user_page(void *kto, con /* * ARMv4 optimised clear_user_page */ -void __attribute__((naked)) +void notrace __attribute__((naked)) v4_mc_clear_user_page(void *kaddr, unsigned long vaddr) { asm volatile( Index: linux-cfs-2.6.23.1.q/arch/arm/mm/copypage-xscale.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/arm/mm/copypage-xscale.c +++ linux-cfs-2.6.23.1.q/arch/arm/mm/copypage-xscale.c @@ -42,7 +42,7 @@ static DEFINE_SPINLOCK(minicache_lock); * Dcache aliasing issue. The writes will be forwarded to the write buffer, * and merged as appropriate. */ -static void __attribute__((naked)) +static void notrace __attribute__((naked)) mc_copy_user_page(void *from, void *to) { /* @@ -110,7 +110,7 @@ void xscale_mc_copy_user_page(void *kto, /* * XScale optimised clear_user_page */ -void __attribute__((naked)) +void notrace __attribute__((naked)) xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr) { asm volatile( Index: linux-cfs-2.6.23.1.q/arch/arm/mm/fault.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/arm/mm/fault.c +++ linux-cfs-2.6.23.1.q/arch/arm/mm/fault.c @@ -215,7 +215,7 @@ out: return fault; } -static int +static notrace int do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { struct task_struct *tsk; @@ -311,7 +311,7 @@ no_context: * interrupt or a critical region, and should only copy the information * from the master page table, nothing more. */ -static int +static notrace int do_translation_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { @@ -354,7 +354,7 @@ bad_area: * Some section permission faults need to be handled gracefully. * They can happen due to a __{get,put}_user during an oops. */ -static int +static notrace int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { do_bad_area(addr, fsr, regs); @@ -364,7 +364,7 @@ do_sect_fault(unsigned long addr, unsign /* * This abort handler always returns "fault". */ -static int +static notrace int do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { return 1; @@ -419,7 +419,7 @@ static struct fsr_info { { do_bad, SIGBUS, 0, "unknown 31" } }; -void __init +void __init notrace hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), int sig, const char *name) { @@ -433,7 +433,7 @@ hook_fault_code(int nr, int (*fn)(unsign /* * Dispatch a data abort to the relevant handler. */ -asmlinkage void __exception +asmlinkage void __exception notrace do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6); @@ -452,7 +452,7 @@ do_DataAbort(unsigned long addr, unsigne arm_notify_die("", regs, &info, fsr, 0); } -asmlinkage void __exception +asmlinkage void __exception notrace do_PrefetchAbort(unsigned long addr, struct pt_regs *regs) { do_translation_fault(addr, 0, regs); Index: linux-cfs-2.6.23.1.q/arch/i386/Kconfig =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/i386/Kconfig +++ linux-cfs-2.6.23.1.q/arch/i386/Kconfig @@ -779,6 +779,14 @@ config BOOT_IOREMAP depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) default y +# +# function tracing might turn this off: +# +config REGPARM + bool + depends on !MCOUNT + default y + config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS Index: linux-cfs-2.6.23.1.q/arch/i386/Makefile =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/i386/Makefile +++ linux-cfs-2.6.23.1.q/arch/i386/Makefile @@ -31,7 +31,7 @@ LDFLAGS_vmlinux := --emit-relocs endif CHECKFLAGS += -D__i386__ -CFLAGS += -pipe -msoft-float -mregparm=3 -freg-struct-return +CFLAGS += -pipe -msoft-float -freg-struct-return # prevent gcc from keeping the stack 16 byte aligned CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) @@ -39,6 +39,8 @@ CFLAGS += $(call cc-option,-mpreferred-s # CPU-specific tuning. Anything which can be shared with UML should go here. include $(srctree)/arch/i386/Makefile.cpu +cflags-$(CONFIG_REGPARM) += -mregparm=3 + # temporary until string.h is fixed cflags-y += -ffreestanding Index: linux-cfs-2.6.23.1.q/arch/i386/boot/compressed/Makefile =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/i386/boot/compressed/Makefile +++ linux-cfs-2.6.23.1.q/arch/i386/boot/compressed/Makefile @@ -9,6 +9,7 @@ targets := vmlinux vmlinux.bin vmlinux. EXTRA_AFLAGS := -traditional LDFLAGS_vmlinux := -T +CFLAGS := -m32 -D__KERNEL__ -Iinclude -O2 -fno-strict-aliasing hostprogs-y := relocs CFLAGS := -m32 -D__KERNEL__ $(LINUX_INCLUDE) -O2 \ Index: linux-cfs-2.6.23.1.q/arch/i386/kernel/Makefile =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/i386/kernel/Makefile +++ linux-cfs-2.6.23.1.q/arch/i386/kernel/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o smpboot.o tsc_sync.o obj-$(CONFIG_SMP) += smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o +obj-$(CONFIG_MCOUNT) += mcount-wrapper.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o Index: linux-cfs-2.6.23.1.q/arch/i386/kernel/apic.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/i386/kernel/apic.c +++ linux-cfs-2.6.23.1.q/arch/i386/kernel/apic.c @@ -581,6 +581,8 @@ void fastcall smp_apic_timer_interrupt(s { struct pt_regs *old_regs = set_irq_regs(regs); + trace_special(regs->eip, 1, 0); + /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow. Index: linux-cfs-2.6.23.1.q/arch/i386/kernel/entry.S =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/i386/kernel/entry.S +++ linux-cfs-2.6.23.1.q/arch/i386/kernel/entry.S @@ -329,6 +329,11 @@ sysenter_past_esp: pushl %eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL +#ifdef CONFIG_EVENT_TRACE + pushl %edx; pushl %ecx; pushl %ebx; pushl %eax + call sys_call + popl %eax; popl %ebx; popl %ecx; popl %edx +#endif GET_THREAD_INFO(%ebp) /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ @@ -343,6 +348,11 @@ sysenter_past_esp: movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work +#ifdef CONFIG_EVENT_TRACE + pushl %eax + call sys_ret + popl %eax +#endif /* if something modifies registers it must also disable sysexit */ movl PT_EIP(%esp), %edx movl PT_OLDESP(%esp), %ecx @@ -366,6 +376,11 @@ ENTRY(system_call) pushl %eax # save orig_eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL +#ifdef CONFIG_EVENT_TRACE + pushl %edx; pushl %ecx; pushl %ebx; pushl %eax + call sys_call + popl %eax; popl %ebx; popl %ecx; popl %edx +#endif GET_THREAD_INFO(%ebp) # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ Index: linux-cfs-2.6.23.1.q/arch/i386/kernel/hpet.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/i386/kernel/hpet.c +++ linux-cfs-2.6.23.1.q/arch/i386/kernel/hpet.c @@ -210,7 +210,7 @@ static int hpet_next_event(unsigned long /* * Clock source related code */ -static cycle_t read_hpet(void) +static cycle_t notrace read_hpet(void) { return (cycle_t)hpet_readl(HPET_COUNTER); } Index: linux-cfs-2.6.23.1.q/arch/i386/kernel/irq.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/i386/kernel/irq.c +++ linux-cfs-2.6.23.1.q/arch/i386/kernel/irq.c @@ -68,7 +68,7 @@ static union irq_ctx *softirq_ctx[NR_CPU * SMP cross-CPU interrupts have their own specific * handlers). */ -fastcall unsigned int do_IRQ(struct pt_regs *regs) +fastcall notrace unsigned int do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs; /* high bit used in ret_from_ code */ @@ -87,6 +87,11 @@ fastcall unsigned int do_IRQ(struct pt_r old_regs = set_irq_regs(regs); irq_enter(); +#ifdef CONFIG_EVENT_TRACE + if (irq == trace_user_trigger_irq) + user_trace_start(); +#endif + trace_special(regs->eip, irq, 0); #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 1KB free? */ { Index: linux-cfs-2.6.23.1.q/arch/i386/kernel/mcount-wrapper.S =================================================================== --- /dev/null +++ linux-cfs-2.6.23.1.q/arch/i386/kernel/mcount-wrapper.S @@ -0,0 +1,27 @@ +/* + * linux/arch/i386/mcount-wrapper.S + * + * Copyright (C) 2004 Ingo Molnar + */ + +.globl mcount +mcount: + + cmpl $0, mcount_enabled + jz out + + push %ebp + mov %esp, %ebp + pushl %eax + pushl %ecx + pushl %edx + + call __mcount + + popl %edx + popl %ecx + popl %eax + popl %ebp +out: + ret + Index: linux-cfs-2.6.23.1.q/arch/i386/kernel/paravirt.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/i386/kernel/paravirt.c +++ linux-cfs-2.6.23.1.q/arch/i386/kernel/paravirt.c @@ -208,7 +208,7 @@ void init_IRQ(void) paravirt_ops.init_IRQ(); } -static void native_flush_tlb(void) +static fastcall void native_flush_tlb(void) { __native_flush_tlb(); } @@ -217,12 +217,12 @@ static void native_flush_tlb(void) * Global pages have to be flushed a bit differently. Not a real * performance problem because this does not happen often. */ -static void native_flush_tlb_global(void) +static fastcall void native_flush_tlb_global(void) { __native_flush_tlb_global(); } -static void native_flush_tlb_single(unsigned long addr) +static fastcall void native_flush_tlb_single(unsigned long addr) { __native_flush_tlb_single(addr); } Index: linux-cfs-2.6.23.1.q/arch/i386/kernel/smp.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/i386/kernel/smp.c +++ linux-cfs-2.6.23.1.q/arch/i386/kernel/smp.c @@ -344,8 +344,9 @@ out: put_cpu_no_resched(); } -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, - unsigned long va) +void fastcall +native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, + unsigned long va) { cpumask_t cpumask = *cpumaskp; @@ -470,6 +471,7 @@ void flush_tlb_all(void) */ static void native_smp_send_reschedule(int cpu) { + trace_special(cpu, 0, 0); WARN_ON(cpu_is_offline(cpu)); send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); } @@ -639,6 +641,7 @@ static void native_smp_send_stop(void) */ fastcall void smp_reschedule_interrupt(struct pt_regs *regs) { + trace_special(regs->eip, 0, 0); ack_APIC_irq(); } Index: linux-cfs-2.6.23.1.q/arch/i386/kernel/traps.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/i386/kernel/traps.c +++ linux-cfs-2.6.23.1.q/arch/i386/kernel/traps.c @@ -236,6 +236,7 @@ show_trace_log_lvl(struct task_struct *t { dump_trace(task, regs, stack, &print_trace_ops, log_lvl); printk("%s =======================\n", log_lvl); + print_traces(task); } void show_trace(struct task_struct *task, struct pt_regs *regs, Index: linux-cfs-2.6.23.1.q/arch/i386/kernel/tsc.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/i386/kernel/tsc.c +++ linux-cfs-2.6.23.1.q/arch/i386/kernel/tsc.c @@ -270,7 +270,7 @@ core_initcall(cpufreq_tsc); static unsigned long current_tsc_khz = 0; -static cycle_t read_tsc(void) +static notrace cycle_t read_tsc(void) { cycle_t ret; Index: linux-cfs-2.6.23.1.q/arch/i386/lib/delay.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/i386/lib/delay.c +++ linux-cfs-2.6.23.1.q/arch/i386/lib/delay.c @@ -23,7 +23,7 @@ #endif /* simple loop based delay: */ -static void delay_loop(unsigned long loops) +static notrace void delay_loop(unsigned long loops) { int d0; @@ -38,7 +38,7 @@ static void delay_loop(unsigned long loo } /* TSC based delay: */ -static void delay_tsc(unsigned long loops) +static notrace void delay_tsc(unsigned long loops) { unsigned long bclock, now; @@ -69,7 +69,7 @@ int read_current_timer(unsigned long *ti return -1; } -void __delay(unsigned long loops) +void notrace __delay(unsigned long loops) { delay_fn(loops); } Index: linux-cfs-2.6.23.1.q/arch/i386/mm/fault.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/i386/mm/fault.c +++ linux-cfs-2.6.23.1.q/arch/i386/mm/fault.c @@ -498,6 +498,7 @@ bad_area_nosemaphore: nr = (address - idt_descr.address) >> 3; if (nr == 6) { + stop_trace(); do_invalid_op(regs, 0); return; } Index: linux-cfs-2.6.23.1.q/arch/i386/mm/init.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/i386/mm/init.c +++ linux-cfs-2.6.23.1.q/arch/i386/mm/init.c @@ -193,7 +193,7 @@ static inline int page_kills_ppro(unsign return 0; } -int page_is_ram(unsigned long pagenr) +int notrace page_is_ram(unsigned long pagenr) { int i; unsigned long addr, end; Index: linux-cfs-2.6.23.1.q/arch/powerpc/kernel/time.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/powerpc/kernel/time.c +++ linux-cfs-2.6.23.1.q/arch/powerpc/kernel/time.c @@ -924,6 +924,7 @@ void __init time_init(void) tb_ticks_per_jiffy = ppc_tb_freq / HZ; tb_ticks_per_sec = ppc_tb_freq; tb_ticks_per_usec = ppc_tb_freq / 1000000; + cpu_khz = ppc_tb_freq / 1000; tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); calc_cputime_factors(); Index: linux-cfs-2.6.23.1.q/arch/ppc/boot/Makefile =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/ppc/boot/Makefile +++ linux-cfs-2.6.23.1.q/arch/ppc/boot/Makefile @@ -14,6 +14,15 @@ # CFLAGS += -fno-builtin -D__BOOTER__ -Iarch/$(ARCH)/boot/include + +ifdef CONFIG_MCOUNT +# do not trace the boot loader +nullstring := +space := $(nullstring) # end of the line +pg_flag = $(nullstring) -pg # end of the line +CFLAGS := $(subst ${pg_flag},${space},${CFLAGS}) +endif + HOSTCFLAGS += -Iarch/$(ARCH)/boot/include BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd Index: linux-cfs-2.6.23.1.q/arch/x86_64/ia32/ia32entry.S =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/x86_64/ia32/ia32entry.S +++ linux-cfs-2.6.23.1.q/arch/x86_64/ia32/ia32entry.S @@ -132,7 +132,9 @@ sysenter_do_call: cmpl $(IA32_NR_syscalls-1),%eax ja ia32_badsys IA32_ARG_FIXUP 1 + TRACE_SYS_IA32_CALL call *ia32_sys_call_table(,%rax,8) + TRACE_SYS_RET movq %rax,RAX-ARGOFFSET(%rsp) GET_THREAD_INFO(%r10) cli @@ -241,7 +243,9 @@ cstar_do_call: cmpl $IA32_NR_syscalls-1,%eax ja ia32_badsys IA32_ARG_FIXUP 1 + TRACE_SYS_IA32_CALL call *ia32_sys_call_table(,%rax,8) + TRACE_SYS_RET movq %rax,RAX-ARGOFFSET(%rsp) GET_THREAD_INFO(%r10) cli @@ -335,8 +339,10 @@ ia32_do_syscall: cmpl $(IA32_NR_syscalls-1),%eax ja ia32_badsys IA32_ARG_FIXUP + TRACE_SYS_IA32_CALL call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: + TRACE_SYS_RET movq %rax,RAX-ARGOFFSET(%rsp) jmp int_ret_from_sys_call @@ -407,7 +413,7 @@ END(ia32_ptregs_common) .section .rodata,"a" .align 8 -ia32_sys_call_table: +ENTRY(ia32_sys_call_table) .quad sys_restart_syscall .quad sys_exit .quad stub32_fork @@ -733,4 +739,7 @@ ia32_sys_call_table: .quad compat_sys_timerfd .quad sys_eventfd .quad sys32_fallocate +#ifdef CONFIG_EVENT_TRACE + .globl ia32_syscall_end +#endif ia32_syscall_end: Index: linux-cfs-2.6.23.1.q/arch/x86_64/kernel/entry.S =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/x86_64/kernel/entry.S +++ linux-cfs-2.6.23.1.q/arch/x86_64/kernel/entry.S @@ -53,6 +53,47 @@ .code64 +#ifdef CONFIG_EVENT_TRACE + +ENTRY(mcount) + cmpl $0, mcount_enabled + jz out + + push %rbp + mov %rsp,%rbp + + push %r11 + push %r10 + push %r9 + push %r8 + push %rdi + push %rsi + push %rdx + push %rcx + push %rax + + mov 0x0(%rbp),%rax + mov 0x8(%rbp),%rdi + mov 0x8(%rax),%rsi + + call __trace + + pop %rax + pop %rcx + pop %rdx + pop %rsi + pop %rdi + pop %r8 + pop %r9 + pop %r10 + pop %r11 + + pop %rbp +out: + ret + +#endif + #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args #endif @@ -234,7 +275,9 @@ ENTRY(system_call) cmpq $__NR_syscall_max,%rax ja badsys movq %r10,%rcx + TRACE_SYS_CALL call *sys_call_table(,%rax,8) # XXX: rip relative + TRACE_SYS_RET movq %rax,RAX-ARGOFFSET(%rsp) /* * Syscall return path ending with SYSRET (fast path) @@ -316,7 +359,9 @@ tracesys: cmova %rcx,%rax ja 1f movq %r10,%rcx /* fixup for C */ + TRACE_SYS_CALL call *sys_call_table(,%rax,8) + TRACE_SYS_RET 1: movq %rax,RAX-ARGOFFSET(%rsp) /* Use IRET because user could have changed frame */ Index: linux-cfs-2.6.23.1.q/arch/x86_64/kernel/head64.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/x86_64/kernel/head64.c +++ linux-cfs-2.6.23.1.q/arch/x86_64/kernel/head64.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -58,7 +59,7 @@ static void __init copy_bootdata(char *r memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); } -void __init x86_64_start_kernel(char * real_mode_data) +void __init notrace x86_64_start_kernel(char * real_mode_data) { int i; Index: linux-cfs-2.6.23.1.q/arch/x86_64/kernel/hpet.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/x86_64/kernel/hpet.c +++ linux-cfs-2.6.23.1.q/arch/x86_64/kernel/hpet.c @@ -112,7 +112,7 @@ int hpet_timer_stop_set_go(unsigned long return 0; } -static cycle_t read_hpet(void) +static cycle_t notrace read_hpet(void) { return (cycle_t)hpet_readl(HPET_COUNTER); } Index: linux-cfs-2.6.23.1.q/arch/x86_64/kernel/irq.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/x86_64/kernel/irq.c +++ linux-cfs-2.6.23.1.q/arch/x86_64/kernel/irq.c @@ -115,6 +115,12 @@ asmlinkage unsigned int do_IRQ(struct pt irq_enter(); irq = __get_cpu_var(vector_irq)[vector]; +#ifdef CONFIG_EVENT_TRACE + if (irq == trace_user_trigger_irq) + user_trace_start(); +#endif + trace_special(regs->rip, irq, 0); + #ifdef CONFIG_DEBUG_STACKOVERFLOW stack_overflow_check(regs); #endif Index: linux-cfs-2.6.23.1.q/arch/x86_64/kernel/setup64.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/x86_64/kernel/setup64.c +++ linux-cfs-2.6.23.1.q/arch/x86_64/kernel/setup64.c @@ -114,7 +114,7 @@ void __init setup_per_cpu_areas(void) } } -void pda_init(int cpu) +void notrace pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); @@ -191,7 +191,7 @@ unsigned long kernel_eflags; * 'CPU state barrier', nothing should get across. * A lot of state is already set up in PDA init. */ -void __cpuinit cpu_init (void) +void __cpuinit notrace cpu_init (void) { int cpu = stack_smp_processor_id(); struct tss_struct *t = &per_cpu(init_tss, cpu); Index: linux-cfs-2.6.23.1.q/arch/x86_64/kernel/smpboot.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/x86_64/kernel/smpboot.c +++ linux-cfs-2.6.23.1.q/arch/x86_64/kernel/smpboot.c @@ -318,7 +318,7 @@ static inline void set_cpu_sibling_map(i /* * Setup code on secondary processor (after comming out of the trampoline) */ -void __cpuinit start_secondary(void) +void __cpuinit notrace start_secondary(void) { /* * Dont put anything before smp_callin(), SMP Index: linux-cfs-2.6.23.1.q/arch/x86_64/kernel/traps.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/x86_64/kernel/traps.c +++ linux-cfs-2.6.23.1.q/arch/x86_64/kernel/traps.c @@ -351,6 +351,7 @@ show_trace(struct task_struct *tsk, stru printk("\nCall Trace:\n"); dump_trace(tsk, regs, stack, &print_trace_ops, NULL); printk("\n"); + print_traces(tsk); } static void Index: linux-cfs-2.6.23.1.q/arch/x86_64/kernel/tsc.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/x86_64/kernel/tsc.c +++ linux-cfs-2.6.23.1.q/arch/x86_64/kernel/tsc.c @@ -157,13 +157,13 @@ __setup("notsc", notsc_setup); /* clock source code: */ -static cycle_t read_tsc(void) +static notrace cycle_t read_tsc(void) { cycle_t ret = (cycle_t)get_cycles_sync(); return ret; } -static cycle_t __vsyscall_fn vread_tsc(void) +static notrace cycle_t __vsyscall_fn vread_tsc(void) { cycle_t ret = (cycle_t)get_cycles_sync(); return ret; Index: linux-cfs-2.6.23.1.q/arch/x86_64/kernel/vsyscall.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/arch/x86_64/kernel/vsyscall.c +++ linux-cfs-2.6.23.1.q/arch/x86_64/kernel/vsyscall.c @@ -44,7 +44,7 @@ #include #include -#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) +#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) notrace #define __syscall_clobber "r11","rcx","memory" #define __pa_vsymbol(x) \ ({unsigned long v; \ Index: linux-cfs-2.6.23.1.q/drivers/clocksource/acpi_pm.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/drivers/clocksource/acpi_pm.c +++ linux-cfs-2.6.23.1.q/drivers/clocksource/acpi_pm.c @@ -30,13 +30,13 @@ */ u32 pmtmr_ioport __read_mostly; -static inline u32 read_pmtmr(void) +static notrace inline u32 read_pmtmr(void) { /* mask the output to 24 bits */ return inl(pmtmr_ioport) & ACPI_PM_MASK; } -u32 acpi_pm_read_verified(void) +u32 notrace acpi_pm_read_verified(void) { u32 v1 = 0, v2 = 0, v3 = 0; @@ -56,12 +56,12 @@ u32 acpi_pm_read_verified(void) return v2; } -static cycle_t acpi_pm_read_slow(void) +static notrace cycle_t acpi_pm_read_slow(void) { return (cycle_t)acpi_pm_read_verified(); } -static cycle_t acpi_pm_read(void) +static notrace cycle_t acpi_pm_read(void) { return (cycle_t)read_pmtmr(); } Index: linux-cfs-2.6.23.1.q/fs/proc/proc_misc.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/fs/proc/proc_misc.c +++ linux-cfs-2.6.23.1.q/fs/proc/proc_misc.c @@ -634,6 +634,20 @@ static int execdomains_read_proc(char *p return proc_calc_metrics(page, start, off, count, eof, len); } +#ifdef CONFIG_EVENT_TRACE +extern struct seq_operations latency_trace_op; +static int latency_trace_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &latency_trace_op); +} +static struct file_operations proc_latency_trace_operations = { + .open = latency_trace_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif + #ifdef CONFIG_MAGIC_SYSRQ /* * writing 'C' to /proc/sysrq-trigger is like sysrq-C @@ -727,6 +741,9 @@ void __init proc_misc_init(void) #ifdef CONFIG_SCHEDSTATS create_seq_entry("schedstat", 0, &proc_schedstat_operations); #endif +#ifdef CONFIG_EVENT_TRACE + create_seq_entry("latency_trace", 0, &proc_latency_trace_operations); +#endif #ifdef CONFIG_PROC_KCORE proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); if (proc_root_kcore) { Index: linux-cfs-2.6.23.1.q/include/asm-arm/pgalloc.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-arm/pgalloc.h +++ linux-cfs-2.6.23.1.q/include/asm-arm/pgalloc.h @@ -109,7 +109,7 @@ static inline void __pmd_populate(pmd_t * * Ensure that we always set both PMD entries. */ -static inline void +static inline void notrace pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { unsigned long pte_ptr = (unsigned long)ptep; @@ -122,7 +122,7 @@ pmd_populate_kernel(struct mm_struct *mm __pmd_populate(pmdp, __pa(pte_ptr) | _PAGE_KERNEL_TABLE); } -static inline void +static inline void notrace pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep) { __pmd_populate(pmdp, page_to_pfn(ptep) << PAGE_SHIFT | _PAGE_USER_TABLE); Index: linux-cfs-2.6.23.1.q/include/asm-arm/timex.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-arm/timex.h +++ linux-cfs-2.6.23.1.q/include/asm-arm/timex.h @@ -16,9 +16,17 @@ typedef unsigned long cycles_t; +#ifndef mach_read_cycles + #define mach_read_cycles() (0) +#ifdef CONFIG_LATENCY_TIMING + #define mach_cycles_to_usecs(d) (d) + #define mach_usecs_to_cycles(d) (d) +#endif +#endif + static inline cycles_t get_cycles (void) { - return 0; + return mach_read_cycles(); } #endif Index: linux-cfs-2.6.23.1.q/include/asm-arm/unistd.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-arm/unistd.h +++ linux-cfs-2.6.23.1.q/include/asm-arm/unistd.h @@ -379,6 +379,10 @@ #define __NR_timerfd (__NR_SYSCALL_BASE+350) #define __NR_eventfd (__NR_SYSCALL_BASE+351) +#ifndef __ASSEMBLY__ +#define NR_syscalls (__NR_set_mempolicy + 1 - __NR_SYSCALL_BASE) +#endif + /* * The following SWIs are ARM private. */ Index: linux-cfs-2.6.23.1.q/include/asm-i386/desc.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-i386/desc.h +++ linux-cfs-2.6.23.1.q/include/asm-i386/desc.h @@ -78,14 +78,14 @@ static inline void pack_gate(__u32 *a, _ #define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) #endif -static inline void write_dt_entry(struct desc_struct *dt, +static inline fastcall void write_dt_entry(struct desc_struct *dt, int entry, u32 entry_low, u32 entry_high) { dt[entry].a = entry_low; dt[entry].b = entry_high; } -static inline void native_set_ldt(const void *addr, unsigned int entries) +static fastcall inline void native_set_ldt(const void *addr, unsigned int entries) { if (likely(entries == 0)) __asm__ __volatile__("lldt %w0"::"q" (0)); @@ -102,39 +102,39 @@ static inline void native_set_ldt(const } -static inline void native_load_tr_desc(void) +static fastcall inline void native_load_tr_desc(void) { asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); } -static inline void native_load_gdt(const struct Xgt_desc_struct *dtr) +static fastcall inline void native_load_gdt(const struct Xgt_desc_struct *dtr) { asm volatile("lgdt %0"::"m" (*dtr)); } -static inline void native_load_idt(const struct Xgt_desc_struct *dtr) +static fastcall inline void native_load_idt(const struct Xgt_desc_struct *dtr) { asm volatile("lidt %0"::"m" (*dtr)); } -static inline void native_store_gdt(struct Xgt_desc_struct *dtr) +static fastcall inline void native_store_gdt(struct Xgt_desc_struct *dtr) { asm ("sgdt %0":"=m" (*dtr)); } -static inline void native_store_idt(struct Xgt_desc_struct *dtr) +static fastcall inline void native_store_idt(struct Xgt_desc_struct *dtr) { asm ("sidt %0":"=m" (*dtr)); } -static inline unsigned long native_store_tr(void) +static fastcall inline unsigned long native_store_tr(void) { unsigned long tr; asm ("str %0":"=r" (tr)); return tr; } -static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) +static fastcall inline void native_load_tls(struct thread_struct *t, unsigned int cpu) { unsigned int i; struct desc_struct *gdt = get_cpu_gdt_table(cpu); Index: linux-cfs-2.6.23.1.q/include/asm-i386/io.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-i386/io.h +++ linux-cfs-2.6.23.1.q/include/asm-i386/io.h @@ -254,7 +254,7 @@ static inline void flush_write_buffers(v #endif /* __KERNEL__ */ -static inline void native_io_delay(void) +static fastcall inline void native_io_delay(void) { asm volatile("outb %%al,$0x80" : : : "memory"); } Index: linux-cfs-2.6.23.1.q/include/asm-i386/irq.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-i386/irq.h +++ linux-cfs-2.6.23.1.q/include/asm-i386/irq.h @@ -41,7 +41,7 @@ extern int irqbalance_disable(char *str) extern void fixup_irqs(cpumask_t map); #endif -unsigned int do_IRQ(struct pt_regs *regs); +extern fastcall notrace unsigned int do_IRQ(struct pt_regs *regs); void init_IRQ(void); void __init native_init_IRQ(void); Index: linux-cfs-2.6.23.1.q/include/asm-i386/irqflags.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-i386/irqflags.h +++ linux-cfs-2.6.23.1.q/include/asm-i386/irqflags.h @@ -12,36 +12,36 @@ #include #ifndef __ASSEMBLY__ -static inline unsigned long native_save_fl(void) +static fastcall inline unsigned long native_save_fl(void) { unsigned long f; asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); return f; } -static inline void native_restore_fl(unsigned long f) +static fastcall inline void native_restore_fl(unsigned long f) { asm volatile("pushl %0 ; popfl": /* no output */ :"g" (f) :"memory", "cc"); } -static inline void native_irq_disable(void) +static fastcall inline void native_irq_disable(void) { asm volatile("cli": : :"memory"); } -static inline void native_irq_enable(void) +static fastcall inline void native_irq_enable(void) { asm volatile("sti": : :"memory"); } -static inline void native_safe_halt(void) +static fastcall inline void native_safe_halt(void) { asm volatile("sti; hlt": : :"memory"); } -static inline void native_halt(void) +static fastcall inline void native_halt(void) { asm volatile("hlt": : :"memory"); } Index: linux-cfs-2.6.23.1.q/include/asm-i386/module.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-i386/module.h +++ linux-cfs-2.6.23.1.q/include/asm-i386/module.h @@ -64,12 +64,18 @@ struct mod_arch_specific #error unknown processor family #endif +#ifdef CONFIG_REGPARM +#define MODULE_REGPARM "REGPARM " +#else +#define MODULE_REGPARM "" +#endif + #ifdef CONFIG_4KSTACKS #define MODULE_STACKSIZE "4KSTACKS " #else #define MODULE_STACKSIZE "" #endif -#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE +#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_REGPARM MODULE_STACKSIZE #endif /* _ASM_I386_MODULE_H */ Index: linux-cfs-2.6.23.1.q/include/asm-i386/msr.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-i386/msr.h +++ linux-cfs-2.6.23.1.q/include/asm-i386/msr.h @@ -1,6 +1,7 @@ #ifndef __ASM_MSR_H #define __ASM_MSR_H +#include #include #ifdef __KERNEL__ @@ -8,7 +9,7 @@ #include -static inline unsigned long long native_read_msr(unsigned int msr) +static fastcall inline unsigned long long native_read_msr(unsigned int msr) { unsigned long long val; @@ -16,7 +17,7 @@ static inline unsigned long long native_ return val; } -static inline unsigned long long native_read_msr_safe(unsigned int msr, +static fastcall inline unsigned long long native_read_msr_safe(unsigned int msr, int *err) { unsigned long long val; @@ -36,12 +37,12 @@ static inline unsigned long long native_ return val; } -static inline void native_write_msr(unsigned int msr, unsigned long long val) +static fastcall inline void native_write_msr(unsigned int msr, unsigned long long val) { asm volatile("wrmsr" : : "c" (msr), "A"(val)); } -static inline int native_write_msr_safe(unsigned int msr, +static fastcall inline int native_write_msr_safe(unsigned int msr, unsigned long long val) { int err; @@ -60,14 +61,14 @@ static inline int native_write_msr_safe( return err; } -static inline unsigned long long native_read_tsc(void) +static fastcall inline unsigned long long native_read_tsc(void) { unsigned long long val; asm volatile("rdtsc" : "=A" (val)); return val; } -static inline unsigned long long native_read_pmc(void) +static fastcall inline unsigned long long native_read_pmc(void) { unsigned long long val; asm volatile("rdpmc" : "=A" (val)); Index: linux-cfs-2.6.23.1.q/include/asm-i386/page.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-i386/page.h +++ linux-cfs-2.6.23.1.q/include/asm-i386/page.h @@ -11,6 +11,7 @@ #ifdef __KERNEL__ #ifndef __ASSEMBLY__ +#include #ifdef CONFIG_X86_USE_3DNOW @@ -49,32 +50,32 @@ typedef struct { unsigned long long pmd; typedef struct { unsigned long long pgd; } pgd_t; typedef struct { unsigned long long pgprot; } pgprot_t; -static inline unsigned long long native_pgd_val(pgd_t pgd) +static fastcall inline unsigned long long native_pgd_val(pgd_t pgd) { return pgd.pgd; } -static inline unsigned long long native_pmd_val(pmd_t pmd) +static fastcall inline unsigned long long native_pmd_val(pmd_t pmd) { return pmd.pmd; } -static inline unsigned long long native_pte_val(pte_t pte) +static fastcall inline unsigned long long native_pte_val(pte_t pte) { return pte.pte_low | ((unsigned long long)pte.pte_high << 32); } -static inline pgd_t native_make_pgd(unsigned long long val) +static fastcall inline pgd_t native_make_pgd(unsigned long long val) { return (pgd_t) { val }; } -static inline pmd_t native_make_pmd(unsigned long long val) +static fastcall inline pmd_t native_make_pmd(unsigned long long val) { return (pmd_t) { val }; } -static inline pte_t native_make_pte(unsigned long long val) +static fastcall inline pte_t native_make_pte(unsigned long long val) { return (pte_t) { .pte_low = val, .pte_high = (val >> 32) } ; } @@ -92,22 +93,22 @@ typedef struct { unsigned long pgd; } pg typedef struct { unsigned long pgprot; } pgprot_t; #define boot_pte_t pte_t /* or would you rather have a typedef */ -static inline unsigned long native_pgd_val(pgd_t pgd) +static fastcall inline unsigned long native_pgd_val(pgd_t pgd) { return pgd.pgd; } -static inline unsigned long native_pte_val(pte_t pte) +static fastcall inline unsigned long native_pte_val(pte_t pte) { return pte.pte_low; } -static inline pgd_t native_make_pgd(unsigned long val) +static fastcall inline pgd_t native_make_pgd(unsigned long val) { return (pgd_t) { val }; } -static inline pte_t native_make_pte(unsigned long val) +static fastcall inline pte_t native_make_pte(unsigned long val) { return (pte_t) { .pte_low = val }; } Index: linux-cfs-2.6.23.1.q/include/asm-i386/paravirt.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-i386/paravirt.h +++ linux-cfs-2.6.23.1.q/include/asm-i386/paravirt.h @@ -71,31 +71,31 @@ struct paravirt_ops void (*banner)(void); /* Set and set time of day */ - unsigned long (*get_wallclock)(void); - int (*set_wallclock)(unsigned long); + unsigned long (fastcall *get_wallclock)(void); + int (fastcall *set_wallclock)(unsigned long); /* cpuid emulation, mostly so that caps bits can be disabled */ - void (*cpuid)(unsigned int *eax, unsigned int *ebx, + void (fastcall *cpuid)(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); /* hooks for various privileged instructions */ - unsigned long (*get_debugreg)(int regno); - void (*set_debugreg)(int regno, unsigned long value); + unsigned long (fastcall *get_debugreg)(int regno); + void (fastcall *set_debugreg)(int regno, unsigned long value); - void (*clts)(void); + void (fastcall *clts)(void); - unsigned long (*read_cr0)(void); - void (*write_cr0)(unsigned long); + unsigned long (fastcall *read_cr0)(void); + void (fastcall *write_cr0)(unsigned long); - unsigned long (*read_cr2)(void); - void (*write_cr2)(unsigned long); + unsigned long (fastcall *read_cr2)(void); + void (fastcall *write_cr2)(unsigned long); - unsigned long (*read_cr3)(void); - void (*write_cr3)(unsigned long); + unsigned long (fastcall *read_cr3)(void); + void (fastcall *write_cr3)(unsigned long); - unsigned long (*read_cr4_safe)(void); - unsigned long (*read_cr4)(void); - void (*write_cr4)(unsigned long); + unsigned long (fastcall *read_cr4_safe)(void); + unsigned long (fastcall *read_cr4)(void); + void (fastcall *write_cr4)(unsigned long); /* * Get/set interrupt state. save_fl and restore_fl are only @@ -103,44 +103,44 @@ struct paravirt_ops * returned from save_fl are undefined, and may be ignored by * restore_fl. */ - unsigned long (*save_fl)(void); - void (*restore_fl)(unsigned long); - void (*irq_disable)(void); - void (*irq_enable)(void); - void (*safe_halt)(void); - void (*halt)(void); + unsigned long (fastcall *save_fl)(void); + void (fastcall *restore_fl)(unsigned long); + void (fastcall *irq_disable)(void); + void (fastcall *irq_enable)(void); + void (fastcall *safe_halt)(void); + void (fastcall *halt)(void); - void (*wbinvd)(void); + void (fastcall *wbinvd)(void); /* MSR, PMC and TSR operations. err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ - u64 (*read_msr)(unsigned int msr, int *err); - int (*write_msr)(unsigned int msr, u64 val); + u64 (fastcall *read_msr)(unsigned int msr, int *err); + int (fastcall *write_msr)(unsigned int msr, u64 val); - u64 (*read_tsc)(void); - u64 (*read_pmc)(void); + u64 (fastcall *read_tsc)(void); + u64 (fastcall *read_pmc)(void); unsigned long long (*sched_clock)(void); unsigned long (*get_cpu_khz)(void); /* Segment descriptor handling */ - void (*load_tr_desc)(void); - void (*load_gdt)(const struct Xgt_desc_struct *); - void (*load_idt)(const struct Xgt_desc_struct *); - void (*store_gdt)(struct Xgt_desc_struct *); - void (*store_idt)(struct Xgt_desc_struct *); - void (*set_ldt)(const void *desc, unsigned entries); - unsigned long (*store_tr)(void); - void (*load_tls)(struct thread_struct *t, unsigned int cpu); - void (*write_ldt_entry)(struct desc_struct *, + void (fastcall *load_tr_desc)(void); + void (fastcall *load_gdt)(const struct Xgt_desc_struct *); + void (fastcall *load_idt)(const struct Xgt_desc_struct *); + void (fastcall *store_gdt)(struct Xgt_desc_struct *); + void (fastcall *store_idt)(struct Xgt_desc_struct *); + void (fastcall *set_ldt)(const void *desc, unsigned entries); + unsigned long (fastcall *store_tr)(void); + void (fastcall *load_tls)(struct thread_struct *t, unsigned int cpu); + void (fastcall *write_ldt_entry)(struct desc_struct *, int entrynum, u32 low, u32 high); - void (*write_gdt_entry)(struct desc_struct *, + void (fastcall *write_gdt_entry)(struct desc_struct *, int entrynum, u32 low, u32 high); - void (*write_idt_entry)(struct desc_struct *, + void (fastcall *write_idt_entry)(struct desc_struct *, int entrynum, u32 low, u32 high); - void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t); + void (fastcall *load_esp0)(struct tss_struct *tss, struct thread_struct *t); - void (*set_iopl_mask)(unsigned mask); - void (*io_delay)(void); + void (fastcall *set_iopl_mask)(unsigned mask); + void (fastcall *io_delay)(void); /* * Hooks for intercepting the creation/use/destruction of an @@ -157,9 +157,9 @@ struct paravirt_ops * Direct APIC operations, principally for VMI. Ideally * these shouldn't be in this interface. */ - void (*apic_write)(unsigned long reg, unsigned long v); - void (*apic_write_atomic)(unsigned long reg, unsigned long v); - unsigned long (*apic_read)(unsigned long reg); + void (fastcall *apic_write)(unsigned long reg, unsigned long v); + void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v); + unsigned long (fastcall *apic_read)(unsigned long reg); void (*setup_boot_clock)(void); void (*setup_secondary_clock)(void); @@ -169,56 +169,56 @@ struct paravirt_ops #endif /* TLB operations */ - void (*flush_tlb_user)(void); - void (*flush_tlb_kernel)(void); - void (*flush_tlb_single)(unsigned long addr); - void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, + void (fastcall *flush_tlb_user)(void); + void (fastcall *flush_tlb_kernel)(void); + void (fastcall *flush_tlb_single)(unsigned long addr); + void (fastcall *flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, unsigned long va); /* Hooks for allocating/releasing pagetable pages */ - void (*alloc_pt)(struct mm_struct *mm, u32 pfn); - void (*alloc_pd)(u32 pfn); - void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); - void (*release_pt)(u32 pfn); - void (*release_pd)(u32 pfn); + void (fastcall *alloc_pt)(struct mm_struct *mm, u32 pfn); + void (fastcall *alloc_pd)(u32 pfn); + void (fastcall *alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); + void (fastcall *release_pt)(u32 pfn); + void (fastcall *release_pd)(u32 pfn); /* Pagetable manipulation functions */ - void (*set_pte)(pte_t *ptep, pte_t pteval); - void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, + void (fastcall *set_pte)(pte_t *ptep, pte_t pteval); + void (fastcall *set_pte_at)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); - void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); - void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); - void (*pte_update_defer)(struct mm_struct *mm, + void (fastcall *set_pmd)(pmd_t *pmdp, pmd_t pmdval); + void (fastcall *pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + void (fastcall *pte_update_defer)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); #ifdef CONFIG_HIGHPTE - void *(*kmap_atomic_pte)(struct page *page, enum km_type type); + void *(fastcall *kmap_atomic_pte)(struct page *page, enum km_type type); #endif #ifdef CONFIG_X86_PAE - void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); - void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); - void (*set_pud)(pud_t *pudp, pud_t pudval); - void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); - void (*pmd_clear)(pmd_t *pmdp); - - unsigned long long (*pte_val)(pte_t); - unsigned long long (*pmd_val)(pmd_t); - unsigned long long (*pgd_val)(pgd_t); - - pte_t (*make_pte)(unsigned long long pte); - pmd_t (*make_pmd)(unsigned long long pmd); - pgd_t (*make_pgd)(unsigned long long pgd); + void (fastcall *set_pte_atomic)(pte_t *ptep, pte_t pteval); + void (fastcall *set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); + void (fastcall *set_pud)(pud_t *pudp, pud_t pudval); + void (fastcall *pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + void (fastcall *pmd_clear)(pmd_t *pmdp); + + unsigned long long (fastcall *pte_val)(pte_t); + unsigned long long (fastcall *pmd_val)(pmd_t); + unsigned long long (fastcall *pgd_val)(pgd_t); + + pte_t (fastcall *make_pte)(unsigned long long pte); + pmd_t (fastcall *make_pmd)(unsigned long long pmd); + pgd_t (fastcall *make_pgd)(unsigned long long pgd); #else - unsigned long (*pte_val)(pte_t); - unsigned long (*pgd_val)(pgd_t); + unsigned long (fastcall *pte_val)(pte_t); + unsigned long (fastcall *pgd_val)(pgd_t); - pte_t (*make_pte)(unsigned long pte); - pgd_t (*make_pgd)(unsigned long pgd); + pte_t (fastcall *make_pte)(unsigned long pte); + pgd_t (fastcall *make_pgd)(unsigned long pgd); #endif /* Set deferred update mode, used for batching operations. */ - void (*set_lazy_mode)(enum paravirt_lazy_mode mode); + void (fastcall *set_lazy_mode)(enum paravirt_lazy_mode mode); /* These two are jmp to, not actually called. */ void (*irq_enable_sysexit)(void); @@ -419,12 +419,12 @@ static inline void load_esp0(struct tss_ } #define ARCH_SETUP paravirt_ops.arch_setup(); -static inline unsigned long get_wallclock(void) +static fastcall inline unsigned long get_wallclock(void) { return PVOP_CALL0(unsigned long, get_wallclock); } -static inline int set_wallclock(unsigned long nowtime) +static fastcall inline int set_wallclock(unsigned long nowtime) { return PVOP_CALL1(int, set_wallclock, nowtime); } Index: linux-cfs-2.6.23.1.q/include/asm-i386/pgtable-2level.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-i386/pgtable-2level.h +++ linux-cfs-2.6.23.1.q/include/asm-i386/pgtable-2level.h @@ -11,16 +11,16 @@ * within a page table are directly modified. Thus, the following * hook is made available. */ -static inline void native_set_pte(pte_t *ptep , pte_t pte) +static fastcall inline void native_set_pte(pte_t *ptep , pte_t pte) { *ptep = pte; } -static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, +static fastcall inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep , pte_t pte) { native_set_pte(ptep, pte); } -static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) +static fastcall inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { *pmdp = pmd; } @@ -36,13 +36,13 @@ static inline void native_set_pmd(pmd_t #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp) +static fastcall inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp) { *xp = __pte(0); } #ifdef CONFIG_SMP -static inline pte_t native_ptep_get_and_clear(pte_t *xp) +static fastcall inline pte_t native_ptep_get_and_clear(pte_t *xp) { return __pte(xchg(&xp->pte_low, 0)); } Index: linux-cfs-2.6.23.1.q/include/asm-i386/pgtable-3level.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-i386/pgtable-3level.h +++ linux-cfs-2.6.23.1.q/include/asm-i386/pgtable-3level.h @@ -33,13 +33,13 @@ static inline int pte_exec_kernel(pte_t * not possible, use pte_get_and_clear to obtain the old pte * value and then use set_pte to update it. -ben */ -static inline void native_set_pte(pte_t *ptep, pte_t pte) +static fastcall inline void native_set_pte(pte_t *ptep, pte_t pte) { ptep->pte_high = pte.pte_high; smp_wmb(); ptep->pte_low = pte.pte_low; } -static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, +static fastcall inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep , pte_t pte) { native_set_pte(ptep, pte); @@ -51,7 +51,7 @@ static inline void native_set_pte_at(str * we are justified in merely clearing the PTE present bit, followed * by a set. The ordering here is important. */ -static inline void native_set_pte_present(struct mm_struct *mm, unsigned long addr, +static fastcall inline void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { ptep->pte_low = 0; @@ -61,15 +61,15 @@ static inline void native_set_pte_presen ptep->pte_low = pte.pte_low; } -static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) +static fastcall inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) { set_64bit((unsigned long long *)(ptep),native_pte_val(pte)); } -static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) +static fastcall inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { set_64bit((unsigned long long *)(pmdp),native_pmd_val(pmd)); } -static inline void native_set_pud(pud_t *pudp, pud_t pud) +static fastcall inline void native_set_pud(pud_t *pudp, pud_t pud) { *pudp = pud; } @@ -79,14 +79,14 @@ static inline void native_set_pud(pud_t * entry, so clear the bottom half first and enforce ordering with a compiler * barrier. */ -static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +static fastcall inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { ptep->pte_low = 0; smp_wmb(); ptep->pte_high = 0; } -static inline void native_pmd_clear(pmd_t *pmd) +static fastcall inline void native_pmd_clear(pmd_t *pmd) { u32 *tmp = (u32 *)pmd; *tmp = 0; @@ -125,7 +125,7 @@ static inline void pud_clear (pud_t * pu pmd_index(address)) #ifdef CONFIG_SMP -static inline pte_t native_ptep_get_and_clear(pte_t *ptep) +static fastcall inline pte_t native_ptep_get_and_clear(pte_t *ptep) { pte_t res; Index: linux-cfs-2.6.23.1.q/include/asm-i386/pgtable.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-i386/pgtable.h +++ linux-cfs-2.6.23.1.q/include/asm-i386/pgtable.h @@ -261,7 +261,7 @@ static inline pte_t pte_mkhuge(pte_t pte #endif /* local pte updates need not use xchg for locking */ -static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) +static fastcall inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) { pte_t res = *ptep; Index: linux-cfs-2.6.23.1.q/include/asm-i386/processor.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-i386/processor.h +++ linux-cfs-2.6.23.1.q/include/asm-i386/processor.h @@ -128,7 +128,7 @@ extern void detect_ht(struct cpuinfo_x86 static inline void detect_ht(struct cpuinfo_x86 *c) {} #endif -static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, +static inline void fastcall native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { /* ecx is often an input as well as an output. */ @@ -492,7 +492,7 @@ static inline void rep_nop(void) #define cpu_relax() rep_nop() -static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread) +static inline void fastcall native_load_esp0(struct tss_struct *tss, struct thread_struct *thread) { tss->x86_tss.esp0 = thread->esp0; /* This can only happen when SEP is enabled, no need to test "SEP"arately */ @@ -503,7 +503,7 @@ static inline void native_load_esp0(stru } -static inline unsigned long native_get_debugreg(int regno) +static inline unsigned long fastcall native_get_debugreg(int regno) { unsigned long val = 0; /* Damn you, gcc! */ @@ -526,7 +526,7 @@ static inline unsigned long native_get_d return val; } -static inline void native_set_debugreg(int regno, unsigned long value) +static inline void fastcall native_set_debugreg(int regno, unsigned long value) { switch (regno) { case 0: @@ -555,7 +555,7 @@ static inline void native_set_debugreg(i /* * Set IOPL bits in EFLAGS from given mask */ -static inline void native_set_iopl_mask(unsigned mask) +static inline void fastcall native_set_iopl_mask(unsigned mask) { unsigned int reg; __asm__ __volatile__ ("pushfl;" @@ -595,7 +595,9 @@ static inline void load_esp0(struct tss_ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx * resulting in stale register contents being returned. */ -static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) +static inline void +cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) { *eax = op; *ecx = 0; @@ -603,8 +605,9 @@ static inline void cpuid(unsigned int op } /* Some CPUID calls want 'count' to be placed in ecx */ -static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, - int *edx) +static inline void +cpuid_count(int op, int count, unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) { *eax = op; *ecx = count; Index: linux-cfs-2.6.23.1.q/include/asm-i386/system.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-i386/system.h +++ linux-cfs-2.6.23.1.q/include/asm-i386/system.h @@ -89,55 +89,55 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" asm volatile("mov %%" #seg ",%0":"=rm" (value)) -static inline void native_clts(void) +static fastcall inline void native_clts(void) { asm volatile ("clts"); } -static inline unsigned long native_read_cr0(void) +static fastcall inline unsigned long native_read_cr0(void) { unsigned long val; asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); return val; } -static inline void native_write_cr0(unsigned long val) +static fastcall inline void native_write_cr0(unsigned long val) { asm volatile("movl %0,%%cr0": :"r" (val)); } -static inline unsigned long native_read_cr2(void) +static fastcall inline unsigned long native_read_cr2(void) { unsigned long val; asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); return val; } -static inline void native_write_cr2(unsigned long val) +static fastcall inline void native_write_cr2(unsigned long val) { asm volatile("movl %0,%%cr2": :"r" (val)); } -static inline unsigned long native_read_cr3(void) +static fastcall inline unsigned long native_read_cr3(void) { unsigned long val; asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); return val; } -static inline void native_write_cr3(unsigned long val) +static fastcall inline void native_write_cr3(unsigned long val) { asm volatile("movl %0,%%cr3": :"r" (val)); } -static inline unsigned long native_read_cr4(void) +static fastcall inline unsigned long native_read_cr4(void) { unsigned long val; asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); return val; } -static inline unsigned long native_read_cr4_safe(void) +static fastcall inline unsigned long native_read_cr4_safe(void) { unsigned long val; /* This could fault if %cr4 does not exist */ @@ -150,12 +150,12 @@ static inline unsigned long native_read_ return val; } -static inline void native_write_cr4(unsigned long val) +static fastcall inline void native_write_cr4(unsigned long val) { asm volatile("movl %0,%%cr4": :"r" (val)); } -static inline void native_wbinvd(void) +static fastcall inline void native_wbinvd(void) { asm volatile("wbinvd": : :"memory"); } Index: linux-cfs-2.6.23.1.q/include/asm-i386/time.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-i386/time.h +++ linux-cfs-2.6.23.1.q/include/asm-i386/time.h @@ -4,7 +4,7 @@ #include #include "mach_time.h" -static inline unsigned long native_get_wallclock(void) +static fastcall inline unsigned long native_get_wallclock(void) { unsigned long retval; @@ -16,7 +16,7 @@ static inline unsigned long native_get_w return retval; } -static inline int native_set_wallclock(unsigned long nowtime) +static fastcall inline int native_set_wallclock(unsigned long nowtime) { int retval; Index: linux-cfs-2.6.23.1.q/include/asm-i386/tlbflush.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-i386/tlbflush.h +++ linux-cfs-2.6.23.1.q/include/asm-i386/tlbflush.h @@ -116,7 +116,7 @@ static inline void flush_tlb_range(struc __flush_tlb(); } -static inline void native_flush_tlb_others(const cpumask_t *cpumask, +static fastcall inline void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, unsigned long va) { } @@ -140,7 +140,7 @@ static inline void flush_tlb_range(struc flush_tlb_mm(vma->vm_mm); } -void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, +void fastcall native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, unsigned long va); #define TLBSTATE_OK 1 Index: linux-cfs-2.6.23.1.q/include/asm-x86_64/calling.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-x86_64/calling.h +++ linux-cfs-2.6.23.1.q/include/asm-x86_64/calling.h @@ -160,3 +160,53 @@ .macro icebp .byte 0xf1 .endm + +/* + * latency-tracing helpers: + */ + + .macro TRACE_SYS_CALL + +#ifdef CONFIG_EVENT_TRACE + SAVE_ARGS + + mov %rdx, %rcx + mov %rsi, %rdx + mov %rdi, %rsi + mov %rax, %rdi + + call sys_call + + RESTORE_ARGS +#endif + .endm + + + .macro TRACE_SYS_IA32_CALL + +#ifdef CONFIG_EVENT_TRACE + SAVE_ARGS + + mov %rdx, %rcx + mov %rsi, %rdx + mov %rdi, %rsi + mov %rax, %rdi + + call sys_ia32_call + + RESTORE_ARGS +#endif + .endm + + .macro TRACE_SYS_RET + +#ifdef CONFIG_EVENT_TRACE + SAVE_ARGS + + mov %rax, %rdi + + call sys_ret + + RESTORE_ARGS +#endif + .endm Index: linux-cfs-2.6.23.1.q/include/asm-x86_64/unistd.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/asm-x86_64/unistd.h +++ linux-cfs-2.6.23.1.q/include/asm-x86_64/unistd.h @@ -11,6 +11,8 @@ * Note: holes are not allowed. */ +#define NR_syscalls (__NR_syscall_max+1) + /* at least 8 syscall per cacheline */ #define __NR_read 0 __SYSCALL(__NR_read, sys_read) Index: linux-cfs-2.6.23.1.q/include/linux/clocksource.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/linux/clocksource.h +++ linux-cfs-2.6.23.1.q/include/linux/clocksource.h @@ -21,6 +21,9 @@ typedef u64 cycle_t; struct clocksource; +extern unsigned long preempt_max_latency; +extern unsigned long preempt_thresh; + /** * struct clocksource - hardware abstraction for a free running counter * Provides mostly state-free accessors to the underlying hardware. @@ -178,8 +181,20 @@ static inline cycle_t clocksource_read(s */ static inline s64 cyc2ns(struct clocksource *cs, cycle_t cycles) { - u64 ret = (u64)cycles; - ret = (ret * cs->mult) >> cs->shift; + return ((u64)cycles * cs->mult) >> cs->shift; +} + +/** + * ns2cyc - converts nanoseconds to clocksource cycles + * @cs: Pointer to clocksource + * @nsecs: Nanoseconds + */ +static inline cycles_t ns2cyc(struct clocksource *cs, u64 nsecs) +{ + cycles_t ret = nsecs << cs->shift; + + do_div(ret, cs->mult + 1); + return ret; } @@ -227,4 +242,8 @@ static inline void update_vsyscall(struc } #endif +extern cycle_t get_monotonic_cycles(void); +extern unsigned long cycles_to_usecs(cycle_t); +extern cycle_t usecs_to_cycles(unsigned long); + #endif /* _LINUX_CLOCKSOURCE_H */ Index: linux-cfs-2.6.23.1.q/include/linux/kernel.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/linux/kernel.h +++ linux-cfs-2.6.23.1.q/include/linux/kernel.h @@ -157,6 +157,8 @@ asmlinkage int vprintk(const char *fmt, __attribute__ ((format (printf, 1, 0))); asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; +extern void early_printk(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); #else static inline int vprintk(const char *s, va_list args) __attribute__ ((format (printf, 1, 0))); Index: linux-cfs-2.6.23.1.q/include/linux/latency_hist.h =================================================================== --- /dev/null +++ linux-cfs-2.6.23.1.q/include/linux/latency_hist.h @@ -0,0 +1,32 @@ +/* + * kernel/latency_hist.h + * + * Add support for histograms of preemption-off latency and + * interrupt-off latency and wakeup latency, it depends on + * Real-Time Preemption Support. + * + * Copyright (C) 2005 MontaVista Software, Inc. + * Yi Yang + * + */ +#ifndef _LINUX_LATENCY_HIST_H_ +#define _LINUX_LATENCY_HIST_H_ + +enum { + INTERRUPT_LATENCY = 0, + PREEMPT_LATENCY, + WAKEUP_LATENCY +}; + +#define MAX_ENTRY_NUM 10240 +#define LATENCY_TYPE_NUM 3 + +#ifdef CONFIG_LATENCY_HIST +extern void latency_hist(int latency_type, int cpu, unsigned long latency); +# define latency_hist_flag 1 +#else +# define latency_hist(a,b,c) do { (void)(cpu); } while (0) +# define latency_hist_flag 0 +#endif /* CONFIG_LATENCY_HIST */ + +#endif /* ifndef _LINUX_LATENCY_HIST_H_ */ Index: linux-cfs-2.6.23.1.q/include/linux/linkage.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/linux/linkage.h +++ linux-cfs-2.6.23.1.q/include/linux/linkage.h @@ -3,6 +3,8 @@ #include +#define notrace __attribute ((no_instrument_function)) + #ifdef __cplusplus #define CPP_ASMLINKAGE extern "C" #else Index: linux-cfs-2.6.23.1.q/include/linux/prctl.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/linux/prctl.h +++ linux-cfs-2.6.23.1.q/include/linux/prctl.h @@ -3,6 +3,7 @@ /* Values to pass as first argument to prctl() */ +#define PR_SET_TRACING 0 /* Second arg is tracing on/off */ #define PR_SET_PDEATHSIG 1 /* Second arg is a signal */ #define PR_GET_PDEATHSIG 2 /* Second arg is a ptr to return the signal */ Index: linux-cfs-2.6.23.1.q/include/linux/preempt.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/linux/preempt.h +++ linux-cfs-2.6.23.1.q/include/linux/preempt.h @@ -10,12 +10,26 @@ #include #include -#ifdef CONFIG_DEBUG_PREEMPT - extern void fastcall add_preempt_count(int val); - extern void fastcall sub_preempt_count(int val); +#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_CRITICAL_TIMING) + extern void notrace add_preempt_count(unsigned int val); + extern void notrace sub_preempt_count(unsigned int val); + extern void notrace mask_preempt_count(unsigned int mask); + extern void notrace unmask_preempt_count(unsigned int mask); #else # define add_preempt_count(val) do { preempt_count() += (val); } while (0) # define sub_preempt_count(val) do { preempt_count() -= (val); } while (0) +# define mask_preempt_count(mask) \ + do { preempt_count() |= (mask); } while (0) +# define unmask_preempt_count(mask) \ + do { preempt_count() &= ~(mask); } while (0) +#endif + +#ifdef CONFIG_CRITICAL_TIMING + extern void touch_critical_timing(void); + extern void stop_critical_timing(void); +#else +# define touch_critical_timing() do { } while (0) +# define stop_critical_timing() do { } while (0) #endif #define inc_preempt_count() add_preempt_count(1) Index: linux-cfs-2.6.23.1.q/include/linux/sched.h =================================================================== --- linux-cfs-2.6.23.1.q.orig/include/linux/sched.h +++ linux-cfs-2.6.23.1.q/include/linux/sched.h @@ -239,6 +239,7 @@ static inline void show_state(void) } extern void show_regs(struct pt_regs *); +extern void irq_show_regs_callback(int cpu, struct pt_regs *regs); /* * TASK is a pointer to the task whose backtrace we want to see (or NULL for current @@ -275,6 +276,105 @@ static inline void touch_all_softlockup_ } #endif +#if defined(CONFIG_PREEMPT_TRACE) || defined(CONFIG_EVENT_TRACE) + extern void print_traces(struct task_struct *task); +#else +# define print_traces(task) do { } while (0) +#endif + +#ifdef CONFIG_FRAME_POINTER +# ifndef CONFIG_ARM +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) +# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) +# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) +# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) +# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) +# else + extern unsigned long arm_return_addr(int level); +# define CALLER_ADDR0 arm_return_addr(0) +# define CALLER_ADDR1 arm_return_addr(1) +# define CALLER_ADDR2 arm_return_addr(2) +# define CALLER_ADDR3 arm_return_addr(3) +# define CALLER_ADDR4 arm_return_addr(4) +# define CALLER_ADDR5 arm_return_addr(5) +#endif +#else +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 0UL +# define CALLER_ADDR2 0UL +# define CALLER_ADDR3 0UL +# define CALLER_ADDR4 0UL +# define CALLER_ADDR5 0UL +#endif + +#ifdef CONFIG_MCOUNT + extern void notrace mcount(void); +#else +# define mcount() do { } while (0) +#endif + +#ifdef CONFIG_EVENT_TRACE + extern int mcount_enabled, trace_enabled, trace_user_triggered, + trace_user_trigger_irq, trace_freerunning, trace_verbose, + trace_print_on_crash, trace_all_cpus, print_functions, + syscall_tracing, stackframe_tracing, trace_use_raw_cycles, + trace_all_runnable; + extern void notrace trace_special(unsigned long v1, unsigned long v2, unsigned long v3); + extern void notrace trace_special_pid(int pid, unsigned long v1, unsigned long v2); + extern void notrace trace_special_u64(unsigned long long v1, unsigned long v2); + extern void notrace trace_special_sym(void); + extern void stop_trace(void); +# define start_trace() do { trace_enabled = 1; } while (0) + extern void print_last_trace(void); + extern void nmi_trace(unsigned long eip, unsigned long parent_eip, + unsigned long flags); + extern long user_trace_start(void); + extern long user_trace_stop(void); + extern void trace_cmdline(void); + extern void init_tracer(void); +#else +# define mcount_enabled 0 +# define trace_enabled 0 +# define syscall_tracing 0 +# define stackframe_tracing 0 +# define trace_user_triggered 0 +# define trace_freerunning 0 +# define trace_all_cpus 0 +# define trace_verbose 0 +# define trace_special(v1,v2,v3) do { } while (0) +# define trace_special_pid(pid,v1,v2) do { } while (0) +# define trace_special_u64(v1,v2) do { } while (0) +# define trace_special_sym() do { } while (0) +# define stop_trace() do { } while (0) +# define start_trace() do { } while (0) +# define print_last_trace() do { } while (0) +# define nmi_trace(eip, parent_eip, flags) do { } while (0) +# define user_trace_start() do { } while (0) +# define user_trace_stop() do { } while (0) +# define trace_cmdline() do { } while (0) +# define init_tracer() do { } while (0) +#endif + +#ifdef CONFIG_WAKEUP_TIMING + extern int wakeup_timing; + extern void __trace_start_sched_wakeup(struct task_struct *p); + extern void trace_stop_sched_switched(struct task_struct *p); + extern void trace_change_sched_cpu(struct task_struct *p, int new_cpu); +#else +# define wakeup_timing 0 +# define __trace_start_sched_wakeup(p) do { } while (0) +# define trace_stop_sched_switched(p) do { } while (0) +# define trace_change_sched_cpu(p, cpu) do { } while (0) +#endif + +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + extern void notrace time_hardirqs_on(unsigned long a0, unsigned long a1); + extern void notrace time_hardirqs_off(unsigned long a0, unsigned long a1); +#else +# define time_hardirqs_on(a0, a1) do { } while (0) +# define time_hardirqs_off(a0, a1) do { } while (0) +#endif /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) @@ -1133,6 +1233,13 @@ struct task_struct { unsigned int lockdep_recursion; #endif +#define MAX_PREEMPT_TRACE 16 + +#ifdef CONFIG_PREEMPT_TRACE + unsigned long preempt_trace_eip[MAX_PREEMPT_TRACE]; + unsigned long preempt_trace_parent_eip[MAX_PREEMPT_TRACE]; +#endif + /* journalling filesystem info */ void *journal_info; Index: linux-cfs-2.6.23.1.q/init/main.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/init/main.c +++ linux-cfs-2.6.23.1.q/init/main.c @@ -592,6 +592,8 @@ asmlinkage void __init start_kernel(void if (panic_later) panic(panic_later, panic_param); + init_tracer(); + lockdep_info(); /* Index: linux-cfs-2.6.23.1.q/kernel/Makefile =================================================================== --- linux-cfs-2.6.23.1.q.orig/kernel/Makefile +++ linux-cfs-2.6.23.1.q/kernel/Makefile @@ -39,6 +39,11 @@ obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_CPUSETS) += cpuset.o obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_STOP_MACHINE) += stop_machine.o +obj-$(CONFIG_DEBUG_PREEMPT) += latency_trace.o +obj-$(CONFIG_WAKEUP_TIMING) += latency_trace.o +obj-$(CONFIG_EVENT_TRACE) += latency_trace.o +obj-$(CONFIG_CRITICAL_TIMING) += latency_trace.o +obj-$(CONFIG_LATENCY_HIST) += latency_hist.o obj-$(CONFIG_AUDIT) += audit.o auditfilter.o obj-$(CONFIG_AUDITSYSCALL) += auditsc.o obj-$(CONFIG_KPROBES) += kprobes.o Index: linux-cfs-2.6.23.1.q/kernel/fork.c =================================================================== --- linux-cfs-2.6.23.1.q.orig/kernel/fork.c +++ linux-cfs-2.6.23.1.q/kernel/fork.c @@ -996,7 +996,7 @@ static struct task_struct *copy_process( rt_mutex_init_task(p); -#ifdef CONFIG_TRACE_IRQFLAGS +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_LOCKDEP) DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif Index: linux-cfs-2.6.23.1.q/kernel/latency_hist.c =================================================================== --- /dev/null +++ linux-cfs-2.6.23.1.q/kernel/latency_hist.c @@ -0,0 +1,267 @@ +/* + * kernel/latency_hist.c + * + * Add support for histograms of preemption-off latency and + * interrupt-off latency and wakeup latency, it depends on + * Real-Time Preemption Support. + * + * Copyright (C) 2005 MontaVista Software, Inc. + * Yi Yang + * + */ +#include +#include +#include +#include +#include +#include +#include + +typedef struct hist_data_struct { + atomic_t hist_mode; /* 0 log, 1 don't log */ + unsigned long min_lat; + unsigned long avg_lat; + unsigned long max_lat; + unsigned long long beyond_hist_bound_samples; + unsigned long long accumulate_lat; + unsigned long long total_samples; + unsigned long long hist_array[MAX_ENTRY_NUM]; +} hist_data_t; + +static struct proc_dir_entry * latency_hist_root = NULL; +static char * latency_hist_proc_dir_root = "latency_hist"; + +static char * percpu_proc_name = "CPU"; + +#ifdef CONFIG_INTERRUPT_OFF_HIST +static DEFINE_PER_CPU(hist_data_t, interrupt_off_hist); +static char * interrupt_off_hist_proc_dir = "interrupt_off_latency"; +#endif + +#ifdef CONFIG_PREEMPT_OFF_HIST +static DEFINE_PER_CPU(hist_data_t, preempt_off_hist); +static char * preempt_off_hist_proc_dir = "preempt_off_latency"; +#endif + +#ifdef CONFIG_WAKEUP_LATENCY_HIST +static DEFINE_PER_CPU(hist_data_t, wakeup_latency_hist); +static char * wakeup_latency_hist_proc_dir = "wakeup_latency"; +#endif + +static struct proc_dir_entry *entry[LATENCY_TYPE_NUM][NR_CPUS]; + +static inline u64 u64_div(u64 x, u64 y) +{ + do_div(x, y); + return x; +} + +void latency_hist(int latency_type, int cpu, unsigned long latency) +{ + hist_data_t * my_hist; + + if ((cpu < 0) || (cpu >= NR_CPUS) || (latency_type < INTERRUPT_LATENCY) + || (latency_type > WAKEUP_LATENCY) || (latency < 0)) + return; + + switch(latency_type) { +#ifdef CONFIG_INTERRUPT_OFF_HIST + case INTERRUPT_LATENCY: + my_hist = (hist_data_t *)&per_cpu(interrupt_off_hist, cpu); + break; +#endif + +#ifdef CONFIG_PREEMPT_OFF_HIST + case PREEMPT_LATENCY: + my_hist = (hist_data_t *)&per_cpu(preempt_off_hist, cpu); + break; +#endif + +#ifdef CONFIG_WAKEUP_LATENCY_HIST + case WAKEUP_LATENCY: + my_hist = (hist_data_t *)&per_cpu(wakeup_latency_hist, cpu); + break; +#endif + default: + return; + } + + if (atomic_read(&my_hist->hist_mode) == 0) + return; + + if (latency >= MAX_ENTRY_NUM) + my_hist->beyond_hist_bound_samples++; + else + my_hist->hist_array[latency]++; + + if (latency < my_hist->min_lat) + my_hist->min_lat = latency; + else if (latency > my_hist->max_lat) + my_hist->max_lat = latency; + + my_hist->total_samples++; + my_hist->accumulate_lat += latency; + my_hist->avg_lat = (unsigned long) u64_div(my_hist->accumulate_lat, + my_hist->total_samples); + return; +} + +static void *l_start(struct seq_file *m, loff_t * pos) +{ + loff_t *index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL); + loff_t index = *pos; + hist_data_t *my_hist = (hist_data_t *) m->private; + + if (!index_ptr) + return NULL; + + if (index == 0) { + atomic_dec(&my_hist->hist_mode); + seq_printf(m, "#Minimum latency: %lu microseconds.\n" + "#Average latency: %lu microseconds.\n" + "#Maximum latency: %lu microseconds.\n" + "#Total samples: %llu\n" + "#There are %llu samples greater or equal than %d microseconds\n" + "#usecs\t%16s\n" + , my_hist->min_lat + , my_hist->avg_lat + , my_hist->max_lat + , my_hist->total_samples + , my_hist->beyond_hist_bound_samples + , MAX_ENTRY_NUM, "samples"); + } + if (index >= MAX_ENTRY_NUM) + return NULL; + + *index_ptr = index; + return index_ptr; +} + +static void *l_next(struct seq_file *m, void *p, loff_t * pos) +{ + loff_t *index_ptr = p; + hist_data_t *my_hist = (hist_data_t *) m->private; + + if (++*pos >= MAX_ENTRY_NUM) { + atomic_inc(&my_hist->hist_mode); + return NULL; + } + *index_ptr = *pos; + return index_ptr; +} + +static void l_stop(struct seq_file *m, void *p) +{ + kfree(p); +} + +static int l_show(struct seq_file *m, void *p) +{ + int index = *(loff_t *) p; + hist_data_t *my_hist = (hist_data_t *) m->private; + + seq_printf(m, "%5d\t%16llu\n", index, my_hist->hist_array[index]); + return 0; +} + +static struct seq_operations latency_hist_seq_op = { + .start = l_start, + .next = l_next, + .stop = l_stop, + .show = l_show +}; + +static int latency_hist_seq_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *entry_ptr = NULL; + int ret, i, j, break_flags = 0; + struct seq_file *seq; + + entry_ptr = PDE(file->f_dentry->d_inode); + for (i = 0; i < LATENCY_TYPE_NUM; i++) { + for (j = 0; j < NR_CPUS; j++) { + if (entry[i][j] == NULL) + continue; + if (entry_ptr->low_ino == entry[i][j]->low_ino) { + break_flags = 1; + break; + } + } + if (break_flags == 1) + break; + } + ret = seq_open(file, &latency_hist_seq_op); + if (break_flags == 1) { + seq = (struct seq_file *)file->private_data; + seq->private = entry[i][j]->data; + } + return ret; +} + +static struct file_operations latency_hist_seq_fops = { + .open = latency_hist_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static __init int latency_hist_init(void) +{ + struct proc_dir_entry *tmp_parent_proc_dir; + int i = 0, len = 0; + hist_data_t *my_hist; + char procname[64]; + + latency_hist_root = proc_mkdir(latency_hist_proc_dir_root, NULL); + + +#ifdef CONFIG_INTERRUPT_OFF_HIST + tmp_parent_proc_dir = proc_mkdir(interrupt_off_hist_proc_dir, latency_hist_root); + for (i = 0; i < NR_CPUS; i++) { + len = sprintf(procname, "%s%d", percpu_proc_name, i); + procname[len] = '\0'; + entry[INTERRUPT_LATENCY][i] = + create_proc_entry(procname, 0, tmp_parent_proc_dir); + entry[INTERRUPT_LATENCY][i]->data = (void *)&per_cpu(interrupt_off_hist, i); + entry[INTERRUPT_LATENCY][i]->proc_fops = &latency_hist_seq_fops; + my_hist = (hist_data_t *) entry[INTERRUPT_LATENCY][i]->data; + atomic_set(&my_hist->hist_mode,1); + my_hist->min_lat = 0xFFFFFFFFUL; + } +#endif + +#ifdef CONFIG_PREEMPT_OFF_HIST + tmp_parent_proc_dir = proc_mkdir(preempt_off_hist_proc_dir, latency_hist_root); + for (i = 0; i < NR_CPUS; i++) { + len = sprintf(procname, "%s%d", percpu_proc_name, i); + procname[len] = '\0'; + entry[PREEMPT_LATENCY][i] = + create_proc_entry(procname, 0, tmp_parent_proc_dir); + entry[PREEMPT_LATENCY][i]->data = (void *)&per_cpu(preempt_off_hist, i); + entry[PREEMPT_LATENCY][i]->proc_fops = &latency_hist_seq_fops; + my_hist = (hist_data_t *) entry[PREEMPT_LATENCY][i]->data; + atomic_set(&my_hist->hist_mode,1); + my_hist->min_lat = 0xFFFFFFFFUL; + } +#endif + +#ifdef CONFIG_WAKEUP_LATENCY_HIST + tmp_parent_proc_dir = proc_mkdir(wakeup_latency_hist_proc_dir, latency_hist_root); + for (i = 0; i < NR_CPUS; i++) { + len = sprintf(procname, "%s%d", percpu_proc_name, i); + procname[len] = '\0'; + entry[WAKEUP_LATENCY][i] = + create_proc_entry(procname, 0, tmp_parent_proc_dir); + entry[WAKEUP_LATENCY][i]->data = (void *)&per_cpu(wakeup_latency_hist, i); + entry[WAKEUP_LATENCY][i]->proc_fops = &latency_hist_seq_fops; + my_hist = (hist_data_t *) entry[WAKEUP_LATENCY][i]->data; + atomic_set(&my_hist->hist_mode,1); + my_hist->min_lat = 0xFFFFFFFFUL; + } +#endif + return 0; + +} + +__initcall(latency_hist_init); + Index: linux-cfs-2.6.23.1.q/kernel/latency_trace.c =================================================================== --- /dev/null +++ linux-cfs-2.6.23.1.q/kernel/latency_trace.c @@ -0,0 +1,2760 @@ +/* + * kernel/latency_trace.c + * + * Copyright (C) 2004-2006 Ingo Molnar + * Copyright (C) 2004 William Lee Irwin III + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef DEFINE_RAW_SPINLOCK +# define DEFINE_RAW_SPINLOCK DEFINE_SPINLOCK +#endif + +#ifndef RAW_SPIN_LOCK_UNLOCKED +# define RAW_SPIN_LOCK_UNLOCKED SPIN_LOCK_UNLOCKED +#endif + +int trace_use_raw_cycles = 0; + +#define __raw_spinlock_t raw_spinlock_t +#define need_resched_delayed() 0 + +#ifdef CONFIG_EVENT_TRACE +/* + * Convert raw cycles to usecs. + * Note: this is not the 'clocksource cycles' value, it's the raw + * cycle counter cycles. We use GTOD to timestamp latency start/end + * points, but the trace entries inbetween are timestamped with + * get_cycles(). + */ +static unsigned long notrace cycles_to_us(cycle_t delta) +{ + if (!trace_use_raw_cycles) + return cycles_to_usecs(delta); +#ifdef CONFIG_X86 + do_div(delta, cpu_khz/1000+1); +#elif defined(CONFIG_PPC) + delta = mulhwu(tb_to_us, delta); +#elif defined(CONFIG_ARM) + delta = mach_cycles_to_usecs(delta); +#else + #error Implement cycles_to_usecs. +#endif + + return (unsigned long) delta; +} +#endif + +static notrace inline cycle_t now(void) +{ + if (trace_use_raw_cycles) + return get_cycles(); + return get_monotonic_cycles(); +} + +#ifndef irqs_off +# define irqs_off() 0 +#endif + +#ifndef DEBUG_WARN_ON +static inline int DEBUG_WARN_ON(int cond) +{ + WARN_ON(cond); + return 0; +} +#endif + +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING +# ifdef CONFIG_CRITICAL_PREEMPT_TIMING +# define irqs_off_preempt_count() preempt_count() +# else +# define irqs_off_preempt_count() 0 +# endif +#endif + +#ifdef CONFIG_WAKEUP_TIMING +struct sch_struct { + __raw_spinlock_t trace_lock; + struct task_struct *task; + int cpu; + struct cpu_trace *tr; +} ____cacheline_aligned_in_smp; + +static __cacheline_aligned_in_smp struct sch_struct sch = + { trace_lock: __RAW_SPIN_LOCK_UNLOCKED }; + +int wakeup_timing = 1; +#endif + +/* + * Track maximum latencies and save the trace: + */ + +/* + * trace_stop_sched_switched must not be called with runqueue locks held! + */ +static __cacheline_aligned_in_smp DECLARE_MUTEX(max_mutex); + +/* + * Sequence count - we record it when starting a measurement and + * skip the latency if the sequence has changed - some other section + * did a maximum and could disturb our measurement with serial console + * printouts, etc. Truly coinciding maximum latencies should be rare + * and what happens together happens separately as well, so this doesnt + * decrease the validity of the maximum found: + */ +static __cacheline_aligned_in_smp unsigned long max_sequence; + +enum trace_type +{ + __TRACE_FIRST_TYPE = 0, + + TRACE_FN, + TRACE_SPECIAL, + TRACE_SPECIAL_PID, + TRACE_SPECIAL_U64, + TRACE_SPECIAL_SYM, + TRACE_CMDLINE, + TRACE_SYSCALL, + TRACE_SYSRET, + + __TRACE_LAST_TYPE +}; + +enum trace_flag_type +{ + TRACE_FLAG_IRQS_OFF = 0x01, + TRACE_FLAG_NEED_RESCHED = 0x02, + TRACE_FLAG_NEED_RESCHED_DELAYED = 0x04, + TRACE_FLAG_HARDIRQ = 0x08, + TRACE_FLAG_SOFTIRQ = 0x10, + TRACE_FLAG_IRQS_HARD_OFF = 0x20, +}; + +/* + * Maximum preemption latency measured. Initialize to maximum, + * we clear it after bootup. + */ +#ifdef CONFIG_LATENCY_HIST +unsigned long preempt_max_latency = (cycle_t)0UL; +#else +unsigned long preempt_max_latency = (cycle_t)ULONG_MAX; +#endif + +unsigned long preempt_thresh; + +/* + * Should this new latency be reported/recorded? + */ +static int report_latency(cycle_t delta) +{ + if (latency_hist_flag && !trace_user_triggered) + return 1; + + if (preempt_thresh) { + if (delta < preempt_thresh) + return 0; + } else { + if (delta <= preempt_max_latency) + return 0; + } + return 1; +} + +#ifdef CONFIG_EVENT_TRACE + +/* + * Number of per-CPU trace entries: + */ +#define MAX_TRACE (65536UL*16UL) + +#define CMDLINE_BYTES 16 + +/* + * 32 bytes on 32-bit platforms: + */ +struct trace_entry { + char type; + char cpu; + char flags; + char preempt_count; // assumes PREEMPT_MASK is 8 bits or less + int pid; + cycle_t timestamp; + union { + struct { + unsigned long eip; + unsigned long parent_eip; + } fn; + struct { + unsigned long eip; + unsigned long v1, v2, v3; + } special; + struct { + unsigned char str[CMDLINE_BYTES]; + } cmdline; + struct { + unsigned long nr; // highest bit: compat call + unsigned long p1, p2, p3; + } syscall; + struct { + unsigned long ret; + } sysret; + struct { + unsigned long __pad3[4]; + } pad; + } u; +} __attribute__((packed)); + +#endif + +struct cpu_trace { + atomic_t disabled; + unsigned long trace_idx; + cycle_t preempt_timestamp; + unsigned long critical_start, critical_end; + unsigned long critical_sequence; + atomic_t underrun; + atomic_t overrun; + int early_warning; + int latency_type; + int cpu; + +#ifdef CONFIG_EVENT_TRACE + struct trace_entry *trace; + char comm[CMDLINE_BYTES]; + pid_t pid; + unsigned long uid; + unsigned long nice; + unsigned long policy; + unsigned long rt_priority; + unsigned long saved_latency; +#endif +#ifdef CONFIG_DEBUG_STACKOVERFLOW + unsigned long stack_check; +#endif +} ____cacheline_aligned_in_smp; + +static struct cpu_trace cpu_traces[NR_CPUS] ____cacheline_aligned_in_smp = +{ [0 ... NR_CPUS-1] = { +#ifdef CONFIG_DEBUG_STACKOVERFLOW + .stack_check = 1 +#endif + } }; + +#ifdef CONFIG_EVENT_TRACE + +int trace_enabled = 0; +int syscall_tracing = 1; +int stackframe_tracing = 0; +int mcount_enabled = 0; +int trace_freerunning = 0; +int trace_print_on_crash = 0; +int trace_verbose = 0; +int trace_all_cpus = 0; +int print_functions = 0; +int trace_all_runnable = 0; + +/* + * user-triggered via gettimeofday(0,1)/gettimeofday(0,0) + */ +int trace_user_triggered = 0; +int trace_user_trigger_irq = -1; + +struct saved_trace_struct { + int cpu; + cycle_t first_timestamp, last_timestamp; + struct cpu_trace traces[NR_CPUS]; +} ____cacheline_aligned_in_smp; + +/* + * The current worst-case trace: + */ +static struct saved_trace_struct max_tr; + +/* + * /proc/latency_trace atomicity: + */ +static DECLARE_MUTEX(out_mutex); + +static struct saved_trace_struct out_tr; + +static void notrace printk_name(unsigned long eip) +{ + char namebuf[KSYM_NAME_LEN+1]; + unsigned long size, offset; + const char *sym_name; + char *modname; + + sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); + if (sym_name) + printk("%s+%#lx/%#lx", sym_name, offset, size); + else + printk("<%08lx>", eip); +} + +#ifdef CONFIG_DEBUG_STACKOVERFLOW + +#ifndef STACK_WARN +# define STACK_WARN (THREAD_SIZE/8) +#endif + +#define MIN_STACK_NEEDED (sizeof(struct thread_info) + STACK_WARN) +#define MAX_STACK (THREAD_SIZE - sizeof(struct thread_info)) + +#if (defined(__i386__) || defined(__x86_64__)) && defined(CONFIG_FRAME_POINTER) +# define PRINT_EXACT_STACKFRAME +#endif + +#ifdef PRINT_EXACT_STACKFRAME +static unsigned long *worst_stack_bp; +#endif +static DEFINE_RAW_SPINLOCK(worst_stack_lock); +unsigned long worst_stack_left = THREAD_SIZE; +static unsigned long worst_stack_printed = THREAD_SIZE; +static char worst_stack_comm[TASK_COMM_LEN+1]; +static int worst_stack_pid; +static unsigned long worst_stack_sp; +static char worst_stack[THREAD_SIZE]; + +static notrace void fill_worst_stack(unsigned long stack_left) +{ + unsigned long flags; + + /* + * On x64, we must not read the PDA during early bootup: + */ +#ifdef CONFIG_X86_64 + if (system_state == SYSTEM_BOOTING) + return; +#endif + spin_lock_irqsave(&worst_stack_lock, flags); + if (likely(stack_left < worst_stack_left)) { + worst_stack_left = stack_left; + memcpy(worst_stack, current_thread_info(), THREAD_SIZE); + worst_stack_sp = (unsigned long)&stack_left; + memcpy(worst_stack_comm, current->comm, TASK_COMM_LEN); + worst_stack_pid = current->pid; +#ifdef PRINT_EXACT_STACKFRAME +# ifdef __i386__ + asm ("mov %%ebp, %0\n" :"=g"(worst_stack_bp)); +# elif defined(__x86_64__) + asm ("mov %%rbp, %0\n" :"=g"(worst_stack_bp)); +# else +# error Poke the author of above asm code lines ! +# endif +#endif + } + spin_unlock_irqrestore(&worst_stack_lock, flags); +} + +#ifdef PRINT_EXACT_STACKFRAME + +/* + * This takes a BP offset to point the BP back into the saved stack, + * the original stack might be long gone (but the stackframe within + * the saved copy still contains references to it). + */ +#define CONVERT_TO_SAVED_STACK(bp) \ + ((void *)worst_stack + ((unsigned long)bp & (THREAD_SIZE-1))) + +static void show_stackframe(void) +{ + unsigned long addr, frame_size, *bp, *prev_bp, sum = 0; + + bp = CONVERT_TO_SAVED_STACK(worst_stack_bp); + + while (bp[0]) { + addr = bp[1]; + if (!kernel_text_address(addr)) + break; + + prev_bp = bp; + bp = CONVERT_TO_SAVED_STACK((unsigned long *)bp[0]); + + frame_size = (bp - prev_bp) * sizeof(long); + + if (frame_size < THREAD_SIZE) { + printk("{ %4ld} ", frame_size); + sum += frame_size; + } else + printk("{=%4ld} ", sum); + + printk("[<%08lx>] ", addr); + printk_name(addr); + printk("\n"); + } +} + +#else + +static inline int valid_stack_ptr(void *p) +{ + return p > (void *)worst_stack && + p < (void *)worst_stack + THREAD_SIZE - 3; +} + +static void show_stackframe(void) +{ + unsigned long prev_frame, addr; + unsigned long *stack; + + prev_frame = (unsigned long)(worst_stack + + (worst_stack_sp & (THREAD_SIZE-1))); + stack = (unsigned long *)prev_frame; + + while (valid_stack_ptr(stack)) { + addr = *stack++; + if (__kernel_text_address(addr)) { + printk("(%4ld) ", (unsigned long)stack - prev_frame); + printk("[<%08lx>] ", addr); + print_symbol("%s\n", addr); + prev_frame = (unsigned long)stack; + } + if ((char *)stack >= worst_stack + THREAD_SIZE) + break; + } +} + +#endif + +static notrace void __print_worst_stack(void) +{ + unsigned long fill_ratio; + printk("----------------------------->\n"); + printk("| new stack fill maximum: %s/%d, %ld bytes (out of %ld bytes).\n", + worst_stack_comm, worst_stack_pid, + MAX_STACK-worst_stack_left, (long)MAX_STACK); + fill_ratio = (MAX_STACK-worst_stack_left)*100/(long)MAX_STACK; + printk("| Stack fill ratio: %02ld%%", fill_ratio); + if (fill_ratio >= 90) + printk(" - BUG: that's quite high, please report this!\n"); + else + printk(" - that's still OK, no need to report this.\n"); + printk("------------|\n"); + + show_stackframe(); + printk("<---------------------------\n\n"); +} + +static notrace void print_worst_stack(void) +{ + unsigned long flags; + + if (irqs_disabled() || preempt_count()) + return; + + spin_lock_irqsave(&worst_stack_lock, flags); + if (worst_stack_printed == worst_stack_left) { + spin_unlock_irqrestore(&worst_stack_lock, flags); + return; + } + worst_stack_printed = worst_stack_left; + spin_unlock_irqrestore(&worst_stack_lock, flags); + + __print_worst_stack(); +} + +static notrace void debug_stackoverflow(struct cpu_trace *tr) +{ + long stack_left; + + if (unlikely(tr->stack_check <= 0)) + return; + atomic_inc(&tr->disabled); + + /* Debugging check for stack overflow: is there less than 1KB free? */ +#ifdef __i386__ + __asm__ __volatile__("and %%esp,%0" : + "=r" (stack_left) : "0" (THREAD_SIZE - 1)); +#elif defined(__x86_64__) + __asm__ __volatile__("and %%rsp,%0" : + "=r" (stack_left) : "0" (THREAD_SIZE - 1)); +#else +# error Poke the author of above asm code lines ! +#endif + if (unlikely(stack_left < MIN_STACK_NEEDED)) { + tr->stack_check = 0; + printk(KERN_ALERT "BUG: stack overflow: only %ld bytes left! [%08lx...(%08lx-%08lx)]\n", + stack_left - sizeof(struct thread_info), + (long)&stack_left, + (long)current_thread_info(), + (long)current_thread_info() + THREAD_SIZE); + fill_worst_stack(stack_left); + __print_worst_stack(); + goto out; + } + if (unlikely(stack_left < worst_stack_left)) { + tr->stack_check--; + fill_worst_stack(stack_left); + print_worst_stack(); + tr->stack_check++; + } else + if (worst_stack_printed != worst_stack_left) { + tr->stack_check--; + print_worst_stack(); + tr->stack_check++; + } +out: + atomic_dec(&tr->disabled); +} + +#endif + +#ifdef CONFIG_EARLY_PRINTK +static void notrace early_printk_name(unsigned long eip) +{ + char namebuf[KSYM_NAME_LEN+1]; + unsigned long size, offset; + const char *sym_name; + char *modname; + + sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); + if (sym_name) + early_printk("%s <%08lx>", sym_name, eip); + else + early_printk("<%08lx>", eip); +} + +static __raw_spinlock_t early_print_lock = __RAW_SPIN_LOCK_UNLOCKED; + +static void notrace early_print_entry(struct trace_entry *entry) +{ + int hardirq, softirq; + + __raw_spin_lock(&early_print_lock); + early_printk("%-5d ", entry->pid); + + early_printk("%d%c%c", + entry->cpu, + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + (entry->flags & TRACE_FLAG_IRQS_HARD_OFF) ? 'D' : '.', + (entry->flags & TRACE_FLAG_NEED_RESCHED_DELAYED) ? 'n' : + ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); + + hardirq = entry->flags & TRACE_FLAG_HARDIRQ; + softirq = entry->flags & TRACE_FLAG_SOFTIRQ; + if (hardirq && softirq) + early_printk("H"); + else { + if (hardirq) + early_printk("h"); + else { + if (softirq) + early_printk("s"); + else + early_printk("."); + } + } + + early_printk(":%d: ", entry->preempt_count); + + if (entry->type == TRACE_FN) { + early_printk_name(entry->u.fn.eip); + early_printk(" <= ("); + early_printk_name(entry->u.fn.parent_eip); + early_printk(")\n"); + } else { + /* special entries: */ + early_printk_name(entry->u.special.eip); + early_printk(": <%08lx> <%08lx> <%08lx>\n", + entry->u.special.v1, + entry->u.special.v2, + entry->u.special.v3); + } + __raw_spin_unlock(&early_print_lock); +} +#else +# define early_print_entry(x) do { } while(0) +#endif + +static void notrace +____trace(int cpu, enum trace_type type, struct cpu_trace *tr, + unsigned long eip, unsigned long parent_eip, + unsigned long v1, unsigned long v2, unsigned long v3, + unsigned long flags) +{ + struct trace_entry *entry; + unsigned long idx, idx_next; + cycle_t timestamp; + u32 pc; + +#ifdef CONFIG_DEBUG_PREEMPT +// WARN_ON(!atomic_read(&tr->disabled)); +#endif + if (!tr->critical_start && !trace_user_triggered && !trace_all_cpus && + !trace_print_on_crash && !print_functions) + goto out; + /* + * Allocate the next index. Make sure an NMI (or interrupt) + * has not taken it away. Potentially redo the timestamp as + * well to make sure the trace timestamps are in chronologic + * order. + */ +again: + idx = tr->trace_idx; + idx_next = idx + 1; + timestamp = now(); + + if (unlikely((trace_freerunning || print_functions || atomic_read(&tr->underrun)) && + (idx_next >= MAX_TRACE) && !atomic_read(&tr->overrun))) { + atomic_inc(&tr->underrun); + idx_next = 0; + } + if (unlikely(idx >= MAX_TRACE)) { + atomic_inc(&tr->overrun); + goto out; + } +#ifdef __HAVE_ARCH_CMPXCHG + if (unlikely(cmpxchg(&tr->trace_idx, idx, idx_next) != idx)) { + if (idx_next == 0) + atomic_dec(&tr->underrun); + goto again; + } +#else +# ifdef CONFIG_SMP +# error CMPXCHG missing +# else + /* No worry, we are protected by the atomic_incr(&tr->disabled) + * in __trace further down + */ + tr->trace_idx = idx_next; +# endif +#endif + if (unlikely(idx_next != 0 && atomic_read(&tr->underrun))) + atomic_inc(&tr->underrun); + + pc = preempt_count(); + + if (unlikely(!tr->trace)) + goto out; + entry = tr->trace + idx; + entry->type = type; +#ifdef CONFIG_SMP + entry->cpu = cpu; +#endif + entry->flags = (irqs_off() ? TRACE_FLAG_IRQS_OFF : 0) | + (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_HARD_OFF : 0)| + ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | + ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | + (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | + (need_resched_delayed() ? TRACE_FLAG_NEED_RESCHED_DELAYED : 0); + entry->preempt_count = pc & 0xff; + entry->pid = current->pid; + entry->timestamp = timestamp; + + switch (type) { + case TRACE_FN: + entry->u.fn.eip = eip; + entry->u.fn.parent_eip = parent_eip; + if (unlikely(print_functions && !in_interrupt())) + early_print_entry(entry); + break; + case TRACE_SPECIAL: + case TRACE_SPECIAL_PID: + case TRACE_SPECIAL_U64: + case TRACE_SPECIAL_SYM: + entry->u.special.eip = eip; + entry->u.special.v1 = v1; + entry->u.special.v2 = v2; + entry->u.special.v3 = v3; + if (unlikely(print_functions && !in_interrupt())) + early_print_entry(entry); + break; + case TRACE_SYSCALL: + entry->u.syscall.nr = eip; + entry->u.syscall.p1 = v1; + entry->u.syscall.p2 = v2; + entry->u.syscall.p3 = v3; + break; + case TRACE_SYSRET: + entry->u.sysret.ret = eip; + break; + case TRACE_CMDLINE: + memcpy(entry->u.cmdline.str, current->comm, CMDLINE_BYTES); + break; + default: + break; + } +out: + ; +} + +static inline void notrace +___trace(enum trace_type type, unsigned long eip, unsigned long parent_eip, + unsigned long v1, unsigned long v2, + unsigned long v3) +{ + struct cpu_trace *tr; + unsigned long flags; + int cpu; + + if (unlikely(trace_enabled <= 0)) + return; + +#if defined(CONFIG_DEBUG_STACKOVERFLOW) && defined(CONFIG_X86) + debug_stackoverflow(cpu_traces + raw_smp_processor_id()); +#endif + + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + /* + * Trace on the CPU where the current highest-prio task + * is waiting to become runnable: + */ +#ifdef CONFIG_WAKEUP_TIMING + if (wakeup_timing && !trace_all_cpus && !trace_print_on_crash && + !print_functions) { + if (!sch.tr || cpu != sch.cpu) + goto out; + tr = sch.tr; + } else + tr = cpu_traces + cpu; +#else + tr = cpu_traces + cpu; +#endif + atomic_inc(&tr->disabled); + if (likely(atomic_read(&tr->disabled) == 1)) { +//#define DEBUG_STACK_POISON +#ifdef DEBUG_STACK_POISON + char stack; + + memset(&stack - 128, 0x34, 128); +#endif + ____trace(cpu, type, tr, eip, parent_eip, v1, v2, v3, flags); + } + atomic_dec(&tr->disabled); +#ifdef CONFIG_WAKEUP_TIMING +out: +#endif + raw_local_irq_restore(flags); +} + +/* + * Special, ad-hoc tracepoints: + */ +void notrace trace_special(unsigned long v1, unsigned long v2, unsigned long v3) +{ + ___trace(TRACE_SPECIAL, CALLER_ADDR0, 0, v1, v2, v3); +} + +EXPORT_SYMBOL(trace_special); + +void notrace trace_special_pid(int pid, unsigned long v1, unsigned long v2) +{ + ___trace(TRACE_SPECIAL_PID, CALLER_ADDR0, 0, pid, v1, v2); +} + +EXPORT_SYMBOL(trace_special_pid); + +void notrace trace_special_u64(unsigned long long v1, unsigned long v2) +{ + ___trace(TRACE_SPECIAL_U64, CALLER_ADDR0, 0, + (unsigned long) (v1 >> 32), (unsigned long) (v1 & 0xFFFFFFFF), + v2); +} + +EXPORT_SYMBOL(trace_special_u64); + +void notrace trace_special_sym(void) +{ +#define STACK_ENTRIES 8 + unsigned long entries[STACK_ENTRIES]; + struct stack_trace trace; + + if (!trace_enabled || !stackframe_tracing) + return; + + trace.entries = entries; + trace.skip = 3; + trace.max_entries = STACK_ENTRIES; + trace.nr_entries = 0; + + save_stack_trace(&trace); + /* + * clear out the rest: + */ + while (trace.nr_entries < trace.max_entries) + entries[trace.nr_entries++] = 0; + + ___trace(TRACE_SPECIAL_SYM, entries[0], 0, + entries[1], entries[2], entries[3]); + ___trace(TRACE_SPECIAL_SYM, entries[4], 0, + entries[5], entries[6], entries[7]); +} + +EXPORT_SYMBOL(trace_special_sym); + +/* + * Non-inlined function: + */ +void notrace __trace(unsigned long eip, unsigned long parent_eip) +{ + ___trace(TRACE_FN, eip, parent_eip, 0, 0, 0); +} + +#ifdef CONFIG_MCOUNT + +extern void mcount(void); + +EXPORT_SYMBOL(mcount); + +void notrace __mcount(void) +{ + ___trace(TRACE_FN, CALLER_ADDR1, CALLER_ADDR2, 0, 0, 0); +} + +#endif + +void notrace +sys_call(unsigned long nr, unsigned long p1, unsigned long p2, unsigned long p3) +{ + if (syscall_tracing) + ___trace(TRACE_SYSCALL, nr, 0, p1, p2, p3); +} + +#if defined(CONFIG_COMPAT) && defined(CONFIG_X86) + +void notrace +sys_ia32_call(unsigned long nr, unsigned long p1, unsigned long p2, + unsigned long p3) +{ + if (syscall_tracing) + ___trace(TRACE_SYSCALL, nr | 0x80000000, 0, p1, p2, p3); +} + +#endif + +void notrace sys_ret(unsigned long ret) +{ + if (syscall_tracing) + ___trace(TRACE_SYSRET, ret, 0, 0, 0, 0); +} + +static void notrace print_name(struct seq_file *m, unsigned long eip) +{ + char namebuf[KSYM_NAME_LEN+1]; + unsigned long size, offset; + const char *sym_name; + char *modname; + + /* + * Special trace values: + */ + if (((long)eip < 100000L) && ((long)eip > -100000L)) { + seq_printf(m, "(%5ld)", eip); + return; + } + sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); + if (sym_name) + seq_puts(m, sym_name); + else + seq_printf(m, "<%08lx>", eip); +} + +static void notrace print_name_offset(struct seq_file *m, unsigned long eip) +{ + char namebuf[KSYM_NAME_LEN+1]; + unsigned long size, offset; + const char *sym_name; + char *modname; + + sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); + if (sym_name) + seq_printf(m, "%s+%#lx/%#lx <%08lx>", + sym_name, offset, size, eip); + else + seq_printf(m, "<%08lx>", eip); +} + +static unsigned long out_sequence = -1; + +static int pid_to_cmdline_array[PID_MAX_DEFAULT+1]; + +static void notrace _trace_cmdline(int cpu, struct cpu_trace *tr) +{ + unsigned long flags; + + local_save_flags(flags); + ____trace(cpu, TRACE_CMDLINE, tr, 0, 0, 0, 0, 0, flags); +} + +void notrace trace_cmdline(void) +{ + ___trace(TRACE_CMDLINE, 0, 0, 0, 0, 0); +} + +static void construct_pid_to_cmdline(struct cpu_trace *tr) +{ + unsigned int i, j, entries, pid; + + if (tr->critical_sequence == out_sequence) + return; + out_sequence = tr->critical_sequence; + + memset(pid_to_cmdline_array, -1, sizeof(int) * (PID_MAX_DEFAULT + 1)); + + if (!tr->trace) + return; + + entries = min(tr->trace_idx, MAX_TRACE); + + for (i = 0; i < entries; i++) { + struct trace_entry *entry = tr->trace + i; + + if (entry->type != TRACE_CMDLINE) + continue; + pid = entry->pid; + if (pid < PID_MAX_DEFAULT) { + pid_to_cmdline_array[pid] = i; + /* + * Replace space with underline - makes it easier + * to process for tools: + */ + for (j = 0; j < CMDLINE_BYTES; j++) + if (entry->u.cmdline.str[j] == ' ') + entry->u.cmdline.str[j] = '_'; + } + } +} + +char *pid_to_cmdline(unsigned long pid) +{ + struct cpu_trace *tr = out_tr.traces + 0; + char *cmdline = "<...>"; + int idx; + + pid = min(pid, (unsigned long)PID_MAX_DEFAULT); + if (!pid) + return ""; + + if (pid_to_cmdline_array[pid] != -1) { + idx = pid_to_cmdline_array[pid]; + if (tr->trace[idx].type == TRACE_CMDLINE) + cmdline = tr->trace[idx].u.cmdline.str; + } + return cmdline; +} + +static void copy_trace(struct cpu_trace *save, struct cpu_trace *tr, int reorder) +{ + if (!save->trace || !tr->trace) + return; + /* free-running needs reordering */ + if (reorder && atomic_read(&tr->underrun)) { + int i, idx, idx0 = tr->trace_idx; + + for (i = 0; i < MAX_TRACE; i++) { + idx = (idx0 + i) % MAX_TRACE; + save->trace[i] = tr->trace[idx]; + } + save->trace_idx = MAX_TRACE; + } else { + save->trace_idx = tr->trace_idx; + + memcpy(save->trace, tr->trace, + min(save->trace_idx, MAX_TRACE) * + sizeof(struct trace_entry)); + } + save->underrun = tr->underrun; + save->overrun = tr->overrun; +} + + +struct block_idx { + int idx[NR_CPUS]; +}; + +/* + * return the trace entry (position) of the smallest-timestamp + * one (that is still in the valid idx range): + */ +static int min_idx(struct block_idx *bidx) +{ + cycle_t min_stamp = (cycle_t) -1; + struct trace_entry *entry; + int cpu, min_cpu = -1, idx; + + for_each_online_cpu(cpu) { + idx = bidx->idx[cpu]; + if (idx >= min(max_tr.traces[cpu].trace_idx, MAX_TRACE)) + continue; + if (idx >= MAX_TRACE*NR_CPUS) { + printk("huh: idx (%d) > %ld*%d!\n", idx, MAX_TRACE, + NR_CPUS); + WARN_ON(1); + break; + } + entry = max_tr.traces[cpu].trace + bidx->idx[cpu]; + if (entry->timestamp < min_stamp) { + min_cpu = cpu; + min_stamp = entry->timestamp; + } + } + + return min_cpu; +} + +/* + * This code is called to construct an output trace from + * the maximum trace. Having separate traces serves both + * atomicity (a new max might be saved while we are busy + * accessing /proc/latency_trace) and it is also used to + * delay the (expensive) sorting of the output trace by + * timestamps, in the trace_all_cpus case. + */ +static void update_out_trace(void) +{ + struct trace_entry *out_entry, *entry, *tmp; + cycle_t stamp, first_stamp, last_stamp; + struct block_idx bidx = { { 0, }, }; + struct cpu_trace *tmp_max, *tmp_out; + int cpu, sum, entries, underrun_sum, overrun_sum; + + /* + * For out_tr we only have the first array's trace entries + * allocated - and they have are larger on SMP to make room + * for all trace entries from all CPUs. + */ + tmp_out = out_tr.traces + 0; + tmp_max = max_tr.traces + max_tr.cpu; + /* + * Easier to copy this way. Note: the trace buffer is private + * to the output buffer, so preserve it: + */ + copy_trace(tmp_out, tmp_max, 0); + tmp = tmp_out->trace; + *tmp_out = *tmp_max; + tmp_out->trace = tmp; + + out_tr.cpu = max_tr.cpu; + + if (!tmp_out->trace) + return; + + out_entry = tmp_out->trace + 0; + + if (!trace_all_cpus) { + entries = min(tmp_out->trace_idx, MAX_TRACE); + if (!entries) + return; + out_tr.first_timestamp = tmp_out->trace[0].timestamp; + out_tr.last_timestamp = tmp_out->trace[entries-1].timestamp; + return; + } + /* + * Find the range of timestamps that are fully traced in + * all CPU traces. (since CPU traces can cover a variable + * range of time, we have to find the best range.) + */ + first_stamp = 0; + for_each_online_cpu(cpu) { + tmp_max = max_tr.traces + cpu; + stamp = tmp_max->trace[0].timestamp; + if (stamp > first_stamp) + first_stamp = stamp; + } + /* + * Save the timestamp range: + */ + tmp_max = max_tr.traces + max_tr.cpu; + entries = min(tmp_max->trace_idx, MAX_TRACE); + /* + * No saved trace yet? + */ + if (!entries) { + out_tr.traces[0].trace_idx = 0; + return; + } + + last_stamp = tmp_max->trace[entries-1].timestamp; + + if (last_stamp < first_stamp) { + WARN_ON(1); + + for_each_online_cpu(cpu) { + tmp_max = max_tr.traces + cpu; + entries = min(tmp_max->trace_idx, MAX_TRACE); + printk("CPU%d: %016Lx (%016Lx) ... #%d (%016Lx) %016Lx\n", + cpu, + tmp_max->trace[0].timestamp, + tmp_max->trace[1].timestamp, + entries, + tmp_max->trace[entries-2].timestamp, + tmp_max->trace[entries-1].timestamp); + } + tmp_max = max_tr.traces + max_tr.cpu; + entries = min(tmp_max->trace_idx, MAX_TRACE); + + printk("CPU%d entries: %d\n", max_tr.cpu, entries); + printk("first stamp: %016Lx\n", first_stamp); + printk(" last stamp: %016Lx\n", first_stamp); + } + +#if 0 + printk("first_stamp: %Ld [%016Lx]\n", first_stamp, first_stamp); + printk(" last_stamp: %Ld [%016Lx]\n", last_stamp, last_stamp); + printk(" +1 stamp: %Ld [%016Lx]\n", + tmp_max->trace[entries].timestamp, + tmp_max->trace[entries].timestamp); + printk(" +2 stamp: %Ld [%016Lx]\n", + tmp_max->trace[entries+1].timestamp, + tmp_max->trace[entries+1].timestamp); + printk(" delta: %Ld\n", last_stamp-first_stamp); + printk(" entries: %d\n", entries); +#endif + + out_tr.first_timestamp = first_stamp; + out_tr.last_timestamp = last_stamp; + + /* + * Fetch trace entries one by one, in increasing timestamp + * order. Start at first_stamp, stop at last_stamp: + */ + sum = 0; + for (;;) { + cpu = min_idx(&bidx); + if (cpu == -1) + break; + entry = max_tr.traces[cpu].trace + bidx.idx[cpu]; + if (entry->timestamp > last_stamp) + break; + + bidx.idx[cpu]++; + if (entry->timestamp < first_stamp) + continue; + *out_entry = *entry; + out_entry++; + sum++; + if (sum >= MAX_TRACE*NR_CPUS) { + printk("huh: sum (%d) > %ld*%d!\n", sum, MAX_TRACE, + NR_CPUS); + WARN_ON(1); + break; + } + } + + sum = 0; + underrun_sum = 0; + overrun_sum = 0; + for_each_online_cpu(cpu) { + sum += max_tr.traces[cpu].trace_idx; + underrun_sum += atomic_read(&max_tr.traces[cpu].underrun); + overrun_sum += atomic_read(&max_tr.traces[cpu].overrun); + } + tmp_out->trace_idx = sum; + atomic_set(&tmp_out->underrun, underrun_sum); + atomic_set(&tmp_out->overrun, overrun_sum); +} + +static void notrace print_help_header(struct seq_file *m) +{ + seq_puts(m, " _------=> CPU# \n"); + seq_puts(m, " / _-----=> irqs-off \n"); + seq_puts(m, " | / _----=> need-resched \n"); + seq_puts(m, " || / _---=> hardirq/softirq \n"); + seq_puts(m, " ||| / _--=> preempt-depth \n"); + seq_puts(m, " |||| / \n"); + seq_puts(m, " ||||| delay \n"); + seq_puts(m, " cmd pid ||||| time | caller \n"); + seq_puts(m, " \\ / ||||| \\ | / \n"); +} + +static void * notrace l_start(struct seq_file *m, loff_t *pos) +{ + loff_t n = *pos; + unsigned long entries; + struct cpu_trace *tr = out_tr.traces + 0; + + down(&out_mutex); + /* + * if the file is being read newly, update the output trace: + */ + if (!n) { + // TODO: use the sequence counter here to optimize + down(&max_mutex); + update_out_trace(); + up(&max_mutex); +#if 0 + if (!tr->trace_idx) { + up(&out_mutex); + return NULL; + } +#endif + construct_pid_to_cmdline(tr); + } + entries = min(tr->trace_idx, MAX_TRACE); + + if (!n) { + seq_printf(m, "preemption latency trace v1.1.5 on %s\n", + UTS_RELEASE); + seq_puts(m, "--------------------------------------------------------------------\n"); + seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d | (M:%s VP:%d, KP:%d, SP:%d HP:%d", + cycles_to_usecs(tr->saved_latency), + entries, + (entries + atomic_read(&tr->underrun) + + atomic_read(&tr->overrun)), + out_tr.cpu, +#if defined(CONFIG_PREEMPT_NONE) + "server", +#elif defined(CONFIG_PREEMPT_VOLUNTARY) + "desktop", +#elif defined(CONFIG_PREEMPT_DESKTOP) + "preempt", +#else + "rt", +#endif + 0, 0, +#ifdef CONFIG_PREEMPT_SOFTIRQS + softirq_preemption +#else + 0 +#endif + , +#ifdef CONFIG_PREEMPT_HARDIRQS + hardirq_preemption +#else + 0 +#endif + ); +#ifdef CONFIG_SMP + seq_printf(m, " #P:%d)\n", num_online_cpus()); +#else + seq_puts(m, ")\n"); +#endif + seq_puts(m, " -----------------\n"); + seq_printf(m, " | task: %.16s-%d (uid:%ld nice:%ld policy:%ld rt_prio:%ld)\n", + tr->comm, tr->pid, tr->uid, tr->nice, + tr->policy, tr->rt_priority); + seq_puts(m, " -----------------\n"); + if (trace_user_triggered) { + seq_puts(m, " => started at: "); + print_name_offset(m, tr->critical_start); + seq_puts(m, "\n => ended at: "); + print_name_offset(m, tr->critical_end); + seq_puts(m, "\n"); + } + seq_puts(m, "\n"); + + if (!trace_verbose) + print_help_header(m); + } + if (n >= entries || !tr->trace) + return NULL; + + return tr->trace + n; +} + +static void * notrace l_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct cpu_trace *tr = out_tr.traces; + unsigned long entries = min(tr->trace_idx, MAX_TRACE); + + WARN_ON(!tr->trace); + + if (++*pos >= entries) { + if (*pos == entries) + seq_puts(m, "\n\nvim:ft=help\n"); + return NULL; + } + return tr->trace + *pos; +} + +static void notrace l_stop(struct seq_file *m, void *p) +{ + up(&out_mutex); +} + +static void print_timestamp(struct seq_file *m, unsigned long abs_usecs, + unsigned long rel_usecs) +{ + seq_printf(m, " %4ldus", abs_usecs); + if (rel_usecs > 100) + seq_puts(m, "!: "); + else if (rel_usecs > 1) + seq_puts(m, "+: "); + else + seq_puts(m, " : "); +} + +static void +print_timestamp_short(struct seq_file *m, unsigned long abs_usecs, + unsigned long rel_usecs) +{ + seq_printf(m, " %4ldus", abs_usecs); + if (rel_usecs > 100) + seq_putc(m, '!'); + else if (rel_usecs > 1) + seq_putc(m, '+'); + else + seq_putc(m, ' '); +} + +static void +print_generic(struct seq_file *m, struct trace_entry *entry) +{ + int hardirq, softirq; + + seq_printf(m, "%8.8s-%-5d ", pid_to_cmdline(entry->pid), entry->pid); + seq_printf(m, "%d", entry->cpu); + seq_printf(m, "%c%c", + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + (entry->flags & TRACE_FLAG_IRQS_HARD_OFF) ? 'D' : '.', + (entry->flags & TRACE_FLAG_NEED_RESCHED_DELAYED) ? 'n' : + ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); + + hardirq = entry->flags & TRACE_FLAG_HARDIRQ; + softirq = entry->flags & TRACE_FLAG_SOFTIRQ; + if (hardirq && softirq) + seq_putc(m, 'H'); + else { + if (hardirq) + seq_putc(m, 'h'); + else { + if (softirq) + seq_putc(m, 's'); + else + seq_putc(m, '.'); + } + } + + if (entry->preempt_count) + seq_printf(m, "%x", entry->preempt_count); + else + seq_puts(m, "."); +} + + +static int notrace l_show_fn(struct seq_file *m, unsigned long trace_idx, + struct trace_entry *entry, struct trace_entry *entry0, + struct trace_entry *next_entry) +{ + unsigned long abs_usecs, rel_usecs; + + abs_usecs = cycles_to_us(entry->timestamp - entry0->timestamp); + rel_usecs = cycles_to_us(next_entry->timestamp - entry->timestamp); + + if (trace_verbose) { + seq_printf(m, "%16s %5d %d %d %08x %08lx [%016Lx] %ld.%03ldms (+%ld.%03ldms): ", + pid_to_cmdline(entry->pid), + entry->pid, entry->cpu, entry->flags, + entry->preempt_count, trace_idx, + entry->timestamp, abs_usecs/1000, + abs_usecs % 1000, rel_usecs/1000, rel_usecs % 1000); + print_name_offset(m, entry->u.fn.eip); + seq_puts(m, " ("); + print_name_offset(m, entry->u.fn.parent_eip); + seq_puts(m, ")\n"); + } else { + print_generic(m, entry); + print_timestamp(m, abs_usecs, rel_usecs); + print_name(m, entry->u.fn.eip); + seq_puts(m, " ("); + print_name(m, entry->u.fn.parent_eip); + seq_puts(m, ")\n"); + } + return 0; +} + +static int notrace l_show_special(struct seq_file *m, unsigned long trace_idx, + struct trace_entry *entry, struct trace_entry *entry0, + struct trace_entry *next_entry, int mode64) +{ + unsigned long abs_usecs, rel_usecs; + + abs_usecs = cycles_to_us(entry->timestamp - entry0->timestamp); + rel_usecs = cycles_to_us(next_entry->timestamp - entry->timestamp); + + print_generic(m, entry); + print_timestamp(m, abs_usecs, rel_usecs); + if (trace_verbose) + print_name_offset(m, entry->u.special.eip); + else + print_name(m, entry->u.special.eip); + + if (!mode64) { + /* + * For convenience, print small numbers in decimal: + */ + if (abs((int)entry->u.special.v1) < 100000) + seq_printf(m, " (%5ld ", entry->u.special.v1); + else + seq_printf(m, " (%lx ", entry->u.special.v1); + if (abs((int)entry->u.special.v2) < 100000) + seq_printf(m, "%5ld ", entry->u.special.v2); + else + seq_printf(m, "%lx ", entry->u.special.v2); + if (abs((int)entry->u.special.v3) < 100000) + seq_printf(m, "%5ld)\n", entry->u.special.v3); + else + seq_printf(m, "%lx)\n", entry->u.special.v3); + } else { + seq_printf(m, " (%13Ld %ld)\n", + ((u64)entry->u.special.v1 << 32) + + (u64)entry->u.special.v2, entry->u.special.v3); + } + return 0; +} + +static int notrace +l_show_special_pid(struct seq_file *m, unsigned long trace_idx, + struct trace_entry *entry, struct trace_entry *entry0, + struct trace_entry *next_entry) +{ + unsigned long abs_usecs, rel_usecs; + unsigned int pid; + + pid = entry->u.special.v1; + + abs_usecs = cycles_to_us(entry->timestamp - entry0->timestamp); + rel_usecs = cycles_to_us(next_entry->timestamp - entry->timestamp); + + print_generic(m, entry); + print_timestamp(m, abs_usecs, rel_usecs); + if (trace_verbose) + print_name_offset(m, entry->u.special.eip); + else + print_name(m, entry->u.special.eip); + seq_printf(m, " <%.8s-%d> (%ld %ld)\n", + pid_to_cmdline(pid), pid, + entry->u.special.v2, entry->u.special.v3); + + return 0; +} + +static int notrace +l_show_special_sym(struct seq_file *m, unsigned long trace_idx, + struct trace_entry *entry, struct trace_entry *entry0, + struct trace_entry *next_entry, int mode64) +{ + unsigned long abs_usecs, rel_usecs; + + abs_usecs = cycles_to_us(entry->timestamp - entry0->timestamp); + rel_usecs = cycles_to_us(next_entry->timestamp - entry->timestamp); + + print_generic(m, entry); + print_timestamp(m, abs_usecs, rel_usecs); + if (trace_verbose) + print_name_offset(m, entry->u.special.eip); + else + print_name(m, entry->u.special.eip); + + seq_puts(m, "()<-"); + print_name(m, entry->u.special.v1); + seq_puts(m, "()<-"); + print_name(m, entry->u.special.v2); + seq_puts(m, "()<-"); + print_name(m, entry->u.special.v3); + seq_puts(m, "()\n"); + + return 0; +} + + +static int notrace l_show_cmdline(struct seq_file *m, unsigned long trace_idx, + struct trace_entry *entry, struct trace_entry *entry0, + struct trace_entry *next_entry) +{ + unsigned long abs_usecs, rel_usecs; + + if (!trace_verbose) + return 0; + + abs_usecs = cycles_to_us(entry->timestamp - entry0->timestamp); + rel_usecs = cycles_to_us(next_entry->timestamp - entry->timestamp); + + seq_printf(m, + "[ => %16s ] %ld.%03ldms (+%ld.%03ldms)\n", + entry->u.cmdline.str, + abs_usecs/1000, abs_usecs % 1000, + rel_usecs/1000, rel_usecs % 1000); + + return 0; +} + +extern unsigned long sys_call_table[NR_syscalls]; + +#if defined(CONFIG_COMPAT) && defined(CONFIG_X86) +extern unsigned long ia32_sys_call_table[], ia32_syscall_end[]; +#define IA32_NR_syscalls (ia32_syscall_end - ia32_sys_call_table) +#endif + +static int notrace l_show_syscall(struct seq_file *m, unsigned long trace_idx, + struct trace_entry *entry, struct trace_entry *entry0, + struct trace_entry *next_entry) +{ + unsigned long abs_usecs, rel_usecs; + unsigned long nr; + + abs_usecs = cycles_to_us(entry->timestamp - entry0->timestamp); + rel_usecs = cycles_to_us(next_entry->timestamp - entry->timestamp); + + print_generic(m, entry); + print_timestamp_short(m, abs_usecs, rel_usecs); + + seq_puts(m, "> "); + nr = entry->u.syscall.nr; +#if defined(CONFIG_COMPAT) && defined(CONFIG_X86) + if (nr & 0x80000000) { + nr &= ~0x80000000; + if (nr < IA32_NR_syscalls) + print_name(m, ia32_sys_call_table[nr]); + else + seq_printf(m, "", nr); + } else +#endif + if (nr < NR_syscalls) + print_name(m, sys_call_table[nr]); + else + seq_printf(m, "", nr); + +#ifdef CONFIG_64BIT + seq_printf(m, " (%016lx %016lx %016lx)\n", + entry->u.syscall.p1, entry->u.syscall.p2, entry->u.syscall.p3); +#else + seq_printf(m, " (%08lx %08lx %08lx)\n", + entry->u.syscall.p1, entry->u.syscall.p2, entry->u.syscall.p3); +#endif + + return 0; +} + +static int notrace l_show_sysret(struct seq_file *m, unsigned long trace_idx, + struct trace_entry *entry, struct trace_entry *entry0, + struct trace_entry *next_entry) +{ + unsigned long abs_usecs, rel_usecs; + + abs_usecs = cycles_to_us(entry->timestamp - entry0->timestamp); + rel_usecs = cycles_to_us(next_entry->timestamp - entry->timestamp); + + print_generic(m, entry); + print_timestamp_short(m, abs_usecs, rel_usecs); + + seq_printf(m, "< (%ld)\n", entry->u.sysret.ret); + + return 0; +} + + +static int notrace l_show(struct seq_file *m, void *p) +{ + struct cpu_trace *tr = out_tr.traces; + struct trace_entry *entry, *entry0, *next_entry; + unsigned long trace_idx; + + cond_resched(); + entry = p; + if (entry->timestamp < out_tr.first_timestamp) + return 0; + if (entry->timestamp > out_tr.last_timestamp) + return 0; + + entry0 = tr->trace; + trace_idx = entry - entry0; + + if (trace_idx + 1 < tr->trace_idx) + next_entry = entry + 1; + else + next_entry = entry; + + if (trace_verbose) + seq_printf(m, "(T%d/#%ld) ", entry->type, trace_idx); + + switch (entry->type) { + case TRACE_FN: + l_show_fn(m, trace_idx, entry, entry0, next_entry); + break; + case TRACE_SPECIAL: + l_show_special(m, trace_idx, entry, entry0, next_entry, 0); + break; + case TRACE_SPECIAL_PID: + l_show_special_pid(m, trace_idx, entry, entry0, next_entry); + break; + case TRACE_SPECIAL_U64: + l_show_special(m, trace_idx, entry, entry0, next_entry, 1); + break; + case TRACE_SPECIAL_SYM: + l_show_special_sym(m, trace_idx, entry, entry0, + next_entry, 1); + break; + case TRACE_CMDLINE: + l_show_cmdline(m, trace_idx, entry, entry0, next_entry); + break; + case TRACE_SYSCALL: + l_show_syscall(m, trace_idx, entry, entry0, next_entry); + break; + case TRACE_SYSRET: + l_show_sysret(m, trace_idx, entry, entry0, next_entry); + break; + default: + seq_printf(m, "unknown trace type %d\n", entry->type); + } + return 0; +} + +struct seq_operations latency_trace_op = { + .start = l_start, + .next = l_next, + .stop = l_stop, + .show = l_show +}; + +/* + * Copy the new maximum trace into the separate maximum-trace + * structure. (this way the maximum trace is permanently saved, + * for later retrieval via /proc/latency_trace) + */ +static void update_max_tr(struct cpu_trace *tr) +{ + struct cpu_trace *save; + int cpu, all_cpus = 0; + +#ifdef CONFIG_PREEMPT + WARN_ON(!preempt_count() && !irqs_disabled()); +#endif + + max_tr.cpu = tr->cpu; + save = max_tr.traces + tr->cpu; + + if ((wakeup_timing || trace_user_triggered || trace_print_on_crash || + print_functions) && trace_all_cpus) { + all_cpus = 1; + for_each_online_cpu(cpu) + atomic_inc(&cpu_traces[cpu].disabled); + } + + save->saved_latency = preempt_max_latency; + save->preempt_timestamp = tr->preempt_timestamp; + save->critical_start = tr->critical_start; + save->critical_end = tr->critical_end; + save->critical_sequence = tr->critical_sequence; + + memcpy(save->comm, current->comm, CMDLINE_BYTES); + save->pid = current->pid; + save->uid = current->uid; + save->nice = current->static_prio - 20 - MAX_RT_PRIO; + save->policy = current->policy; + save->rt_priority = current->rt_priority; + + if (all_cpus) { + for_each_online_cpu(cpu) { + copy_trace(max_tr.traces + cpu, cpu_traces + cpu, 1); + atomic_dec(&cpu_traces[cpu].disabled); + } + } else + copy_trace(save, tr, 1); +} + +#else /* !EVENT_TRACE */ + +static inline void notrace +____trace(int cpu, enum trace_type type, struct cpu_trace *tr, + unsigned long eip, unsigned long parent_eip, + unsigned long v1, unsigned long v2, unsigned long v3, + unsigned long flags) +{ +} + +static inline void notrace +___trace(enum trace_type type, unsigned long eip, unsigned long parent_eip, + unsigned long v1, unsigned long v2, + unsigned long v3) +{ +} + +static inline void notrace __trace(unsigned long eip, unsigned long parent_eip) +{ +} + +static inline void update_max_tr(struct cpu_trace *tr) +{ +} + +static inline void notrace _trace_cmdline(int cpu, struct cpu_trace *tr) +{ +} + +#endif + +static int setup_preempt_thresh(char *s) +{ + int thresh; + + get_option(&s, &thresh); + if (thresh > 0) { + preempt_thresh = usecs_to_cycles(thresh); + printk("Preemption threshold = %u us\n", thresh); + } + return 1; +} +__setup("preempt_thresh=", setup_preempt_thresh); + +static inline void notrace reset_trace_idx(int cpu, struct cpu_trace *tr) +{ + if (trace_all_cpus) + for_each_online_cpu(cpu) { + tr = cpu_traces + cpu; + tr->trace_idx = 0; + atomic_set(&tr->underrun, 0); + atomic_set(&tr->overrun, 0); + } + else{ + tr->trace_idx = 0; + atomic_set(&tr->underrun, 0); + atomic_set(&tr->overrun, 0); + } +} + +#ifdef CONFIG_CRITICAL_TIMING + +static void notrace +check_critical_timing(int cpu, struct cpu_trace *tr, unsigned long parent_eip) +{ + unsigned long latency, t0, t1; + cycle_t T0, T1, T2, delta; + unsigned long flags; + + if (trace_user_triggered) + return; + /* + * usecs conversion is slow so we try to delay the conversion + * as long as possible: + */ + T0 = tr->preempt_timestamp; + T1 = get_monotonic_cycles(); + delta = T1-T0; + + local_save_flags(flags); + + if (!report_latency(delta)) + goto out; + + ____trace(cpu, TRACE_FN, tr, CALLER_ADDR0, parent_eip, 0, 0, 0, flags); + /* + * Update the timestamp, because the trace entry above + * might change it (it can only get larger so the latency + * is fair to be reported): + */ + T2 = get_monotonic_cycles(); + + delta = T2-T0; + + latency = cycles_to_usecs(delta); + latency_hist(tr->latency_type, cpu, latency); + + if (latency_hist_flag) { + if (preempt_max_latency >= delta) + goto out; + } + + if (tr->critical_sequence != max_sequence || down_trylock(&max_mutex)) + goto out; + +#ifndef CONFIG_CRITICAL_LATENCY_HIST + if (!preempt_thresh && preempt_max_latency > delta) { + printk("bug: updating %016Lx > %016Lx?\n", + preempt_max_latency, delta); + printk(" [%016Lx %016Lx %016Lx]\n", T0, T1, T2); + } +#endif + + preempt_max_latency = delta; + t0 = cycles_to_usecs(T0); + t1 = cycles_to_usecs(T1); + + tr->critical_end = parent_eip; + + update_max_tr(tr); + +#ifndef CONFIG_CRITICAL_LATENCY_HIST + if (preempt_thresh) + print