diff -Nru linux-2.4.13.vanilla/Makefile linux-2.4.13.latxs/Makefile --- linux-2.4.13.vanilla/Makefile Tue Oct 23 22:21:20 2001 +++ linux-2.4.13.latxs/Makefile Thu Oct 25 11:38:53 2001 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 13 -EXTRAVERSION = +EXTRAVERSION = latxs KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) diff -Nru linux-2.4.13.vanilla/arch/i386/kernel/process.c linux-2.4.13.latxs/arch/i386/kernel/process.c --- linux-2.4.13.vanilla/arch/i386/kernel/process.c Thu Oct 4 18:42:54 2001 +++ linux-2.4.13.latxs/arch/i386/kernel/process.c Thu Oct 25 11:35:54 2001 @@ -135,6 +135,9 @@ idle(); schedule(); check_pgt_cache(); +#ifdef CONFIG_SMP + runqueue_balance(IDLE_RQBALANCE); +#endif /* #ifdef CONFIG_SMP */ } } diff -Nru linux-2.4.13.vanilla/arch/i386/kernel/smpboot.c linux-2.4.13.latxs/arch/i386/kernel/smpboot.c --- linux-2.4.13.vanilla/arch/i386/kernel/smpboot.c Thu Oct 4 18:42:54 2001 +++ linux-2.4.13.latxs/arch/i386/kernel/smpboot.c Thu Oct 25 11:35:55 2001 @@ -771,7 +771,7 @@ extern unsigned long cpu_initialized; -static void __init do_boot_cpu (int apicid) +static void __init do_boot_cpu (int apicid) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. @@ -799,15 +799,14 @@ if (!idle) panic("No idle process for CPU %d", cpu); - idle->processor = cpu; - map_cpu_to_boot_apicid(cpu, apicid); - idle->has_cpu = 1; /* we schedule the first task manually */ - idle->thread.eip = (unsigned long) start_secondary; - del_from_runqueue(idle); unhash_process(idle); + + idle->has_cpu = 1; /* we schedule the first task manually */ + idle->thread.eip = (unsigned long) start_secondary; + idle->processor = cpu; init_tasks[cpu] = idle; /* start_eip had better be page-aligned! */ @@ -830,7 +829,7 @@ /* stash the current NMI vector, so we can put things back */ nmi_high = *((volatile unsigned short *) TRAMPOLINE_HIGH); nmi_low = *((volatile unsigned short *) TRAMPOLINE_LOW); - } + } CMOS_WRITE(0xa, 0xf); local_flush_tlb(); diff -Nru linux-2.4.13.vanilla/drivers/char/Makefile linux-2.4.13.latxs/drivers/char/Makefile --- linux-2.4.13.vanilla/drivers/char/Makefile Mon Oct 15 13:36:48 2001 +++ linux-2.4.13.latxs/drivers/char/Makefile Thu Oct 25 11:37:50 2001 @@ -16,7 +16,7 @@ O_TARGET := char.o -obj-y += mem.o tty_io.o n_tty.o tty_ioctl.o raw.o pty.o misc.o random.o +obj-y += mem.o tty_io.o n_tty.o tty_ioctl.o raw.o pty.o misc.o random.o latsched.o # All of the (potential) objects that export symbols. # This list comes from 'grep -l EXPORT_SYMBOL *.[hc]'. diff -Nru linux-2.4.13.vanilla/drivers/char/latsched.c linux-2.4.13.latxs/drivers/char/latsched.c --- linux-2.4.13.vanilla/drivers/char/latsched.c Wed Dec 31 16:00:00 1969 +++ linux-2.4.13.latxs/drivers/char/latsched.c Thu Oct 25 11:37:50 2001 @@ -0,0 +1,177 @@ +/* + * linux/kernel/latsched.c + * + * Kernel scheduler latency tester + * + * Copyright (C) 2001, Davide Libenzi + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + + + + +#define DEBUG 0 +#ifdef DEBUG +#define DPRINTK(x) printk x +#define DNPRINTK(n,x) if (n <= DEBUG) printk x +#else +#define DPRINTK(x) +#define DNPRINTK(n,x) +#endif + + + +struct latsched { + +}; + + + +static int open_latsched(struct inode *inode, struct file *file); +static int close_latsched(struct inode *inode, struct file *file); +static int ioctl_latsched(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg); + + +static struct file_operations latsched_fops = { + ioctl: ioctl_latsched, + open: open_latsched, + release: close_latsched +}; + +static struct miscdevice latsched = { + LATSCHED_MINOR, "latsched", &latsched_fops +}; + + + + + + + +static int open_latsched(struct inode *inode, struct file *file) +{ + int res; + struct latsched *ls; + + if (!(ls = kmalloc(sizeof(struct latsched), GFP_KERNEL))) + return -ENOMEM; + + memset(ls, 0, sizeof(*ls)); + + + file->private_data = ls; + + MOD_INC_USE_COUNT; + + DNPRINTK(3, (KERN_INFO "[%p] /dev/latsched: open() ls=%p\n", current, ls)); + return 0; +} + + +static int close_latsched(struct inode *inode, struct file *file) +{ + struct latsched *ls = (struct latsched *) file->private_data; + + kfree(ls); + + MOD_DEC_USE_COUNT; + + DNPRINTK(3, (KERN_INFO "[%p] /dev/latsched: close() ls=%p\n", current, ls)); + return 0; +} + + +static int ioctl_latsched(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + int res; + struct latsched *ls = (struct latsched *) file->private_data; + struct lsctl_getdata lsgd; + + switch (cmd) { + case LS_START: + res = latsched_start(1); + + DNPRINTK(3, (KERN_INFO "[%p] /dev/latsched: ioctl(%p, LS_START) == %d\n", + current, ls, res)); + return res; + + case LS_STOP: + res = latsched_start(0); + + DNPRINTK(3, (KERN_INFO "[%p] /dev/latsched: ioctl(%p, LS_STOP) == %d\n", + current, ls, res)); + return res; + + case LS_FETCH: + if ((res = verify_area(VERIFY_WRITE, (void *) arg, sizeof(struct lsctl_getdata)))) + return res; + __copy_from_user(&lsgd, (void *) arg, sizeof(struct lsctl_getdata)); + if ((res = verify_area(VERIFY_WRITE, (void *) lsgd.data, lsgd.size * sizeof(struct latsched_sample)))) + return res; + + if (!(res = latsched_getdata(&lsgd))) + __copy_to_user((void *) arg, &lsgd, sizeof(struct lsctl_getdata)); + + DNPRINTK(3, (KERN_INFO "[%p] /dev/latsched: ioctl(%p, LS_FETCH, %d) == %d\n", + current, ls, lsgd.cpu, res)); + return res; + + case LS_SAMPLES: + res = latsched_setsamples((int) arg); + + DNPRINTK(3, (KERN_INFO "[%p] /dev/latsched: ioctl(%p, LS_SAMPLES, %lu) == %d\n", + current, ls, arg, res)); + return res; + } + + return -EINVAL; +} + + + + +int __init init_latsched(void) +{ + + misc_register(&latsched); + + printk(KERN_INFO "[%p] /dev/latsched: driver installed.\n", current); + + return 0; +} + + +module_init(init_latsched); + diff -Nru linux-2.4.13.vanilla/include/linux/latsched.h linux-2.4.13.latxs/include/linux/latsched.h --- linux-2.4.13.vanilla/include/linux/latsched.h Wed Dec 31 16:00:00 1969 +++ linux-2.4.13.latxs/include/linux/latsched.h Fri Oct 26 16:49:27 2001 @@ -0,0 +1,41 @@ +/* + * linux/include/linux/latsched.h + * + * Kernel scheduler latency tester + * + * Copyright (C) 2001, Davide Libenzi + * + */ + +#ifndef _LINUX_LATSCHED_H +#define _LINUX_LATSCHED_H + +#include + +#define LATSCHED_MINOR 117 +#define STD_LATSCHED_SAMPLES 1024 + +struct latsched_sample { + cycles_t lss_in, lss_out; + pid_t lss_pid; +}; +struct latsched_data { + struct latsched_sample *ls_data; + int ls_size; + int ls_curr; +}; +struct lsctl_getdata { + int cpu; + int size; + struct latsched_sample *data; + int rsize; +}; + +#define LS_START _IO('P', 1) +#define LS_STOP _IO('P', 2) +#define LS_FETCH _IOWR('P', 3, struct lsctl_getdata) +#define LS_SAMPLES _IOR('P', 4, int) + + +#endif /* #ifndef _LINUX_LATSCHED_H */ + diff -Nru linux-2.4.13.vanilla/include/linux/sched.h linux-2.4.13.latxs/include/linux/sched.h --- linux-2.4.13.vanilla/include/linux/sched.h Tue Oct 23 21:59:06 2001 +++ linux-2.4.13.latxs/include/linux/sched.h Fri Oct 26 16:49:27 2001 @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -26,6 +27,7 @@ #include #include #include +#include struct exec_domain; @@ -72,7 +74,10 @@ #define CT_TO_SECS(x) ((x) / HZ) #define CT_TO_USECS(x) (((x) % HZ) * 1000000/HZ) -extern int nr_running, nr_threads; +#define nr_running atomic_read(&gnr_running) + +extern atomic_t gnr_running; +extern int nr_threads; extern int last_pid; #include @@ -139,9 +144,12 @@ * a separate lock). */ extern rwlock_t tasklist_lock; -extern spinlock_t runqueue_lock; extern spinlock_t mmlist_lock; +extern void latsched_init(void); +extern int latsched_start(int on); +extern int latsched_setsamples(int nsamps); +extern int latsched_getdata(struct lsctl_getdata *lsgd); extern void sched_init(void); extern void init_idle(void); extern void show_state(void); @@ -312,6 +320,7 @@ */ struct list_head run_list; unsigned long sleep_time; + unsigned long cpu_jtime, sched_jtime; struct task_struct *next_task, *prev_task; struct mm_struct *active_mm; @@ -332,9 +341,9 @@ pid_t tgid; /* boolean value for session group leader */ int leader; - /* + /* * pointers to (original) parent process, youngest child, younger sibling, - * older sibling, respectively. (p->father can be replaced with + * older sibling, respectively. (p->father can be replaced with * p->p_pptr->pid) */ struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr; @@ -393,12 +402,15 @@ int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; - + /* Thread group tracking */ u32 parent_exec_id; u32 self_exec_id; /* Protection of (de-)allocation: mm, files, fs, tty */ spinlock_t alloc_lock; +/* a better place for these brothers must be found */ + int move_to_cpu; + struct list_head proclist_cpu; }; /* @@ -485,7 +497,10 @@ sig: &init_signals, \ pending: { NULL, &tsk.pending.head, {{0}}}, \ blocked: {{0}}, \ - alloc_lock: SPIN_LOCK_UNLOCKED \ + alloc_lock: SPIN_LOCK_UNLOCKED, \ + move_to_cpu: 0, \ + proclist_cpu: LIST_HEAD_INIT(tsk.proclist_cpu), \ + cpu_jtime: 0, \ } @@ -765,6 +780,20 @@ extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)); extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)); +extern void del_from_runqueue(struct task_struct * p); +extern void add_to_proclist(struct task_struct * p); +extern void del_from_proclist(struct task_struct * p); +extern int move_to_cpu(struct task_struct * p, int cpu, int stick); +extern int get_best_cpu(void); +extern int runqueue_balance(int mode); +extern void runqueue_spin_lock(struct task_struct * p); +extern void runqueue_spin_unlock(struct task_struct * p); + + +#define IDLE_RQBALANCE 0 + + + #define __wait_event(wq, condition) \ do { \ wait_queue_t __wait; \ @@ -808,7 +837,7 @@ current->state = TASK_RUNNING; \ remove_wait_queue(&wq, &__wait); \ } while (0) - + #define wait_event_interruptible(wq, condition) \ ({ \ int __ret = 0; \ @@ -818,6 +847,7 @@ }) #define REMOVE_LINKS(p) do { \ + del_from_proclist(p); \ (p)->next_task->prev_task = (p)->prev_task; \ (p)->prev_task->next_task = (p)->next_task; \ if ((p)->p_osptr) \ @@ -829,6 +859,7 @@ } while (0) #define SET_LINKS(p) do { \ + add_to_proclist(p); \ (p)->next_task = &init_task; \ (p)->prev_task = init_task.prev_task; \ init_task.prev_task->next_task = (p); \ @@ -845,13 +876,6 @@ #define next_thread(p) \ list_entry((p)->thread_group.next, struct task_struct, thread_group) -static inline void del_from_runqueue(struct task_struct * p) -{ - nr_running--; - p->sleep_time = jiffies; - list_del(&p->run_list); - p->run_list.next = NULL; -} static inline int task_on_runqueue(struct task_struct *p) { diff -Nru linux-2.4.13.vanilla/init/main.c linux-2.4.13.latxs/init/main.c --- linux-2.4.13.vanilla/init/main.c Fri Oct 12 10:17:15 2001 +++ linux-2.4.13.latxs/init/main.c Thu Oct 25 11:37:50 2001 @@ -573,7 +573,7 @@ /* only text is profiled */ prof_len = (unsigned long) &_etext - (unsigned long) &_stext; prof_len >>= prof_shift; - + size = prof_len * sizeof(unsigned int) + PAGE_SIZE-1; prof_buffer = (unsigned int *) alloc_bootmem(size); } @@ -611,12 +611,13 @@ check_bugs(); printk("POSIX conformance testing by UNIFIX\n"); - /* - * We count on the initial thread going ok + /* + * We count on the initial thread going ok * Like idlers init is an unlocked kernel thread, which will * make syscalls (and thus be locked). */ smp_init(); + latsched_init(); rest_init(); } diff -Nru linux-2.4.13.vanilla/kernel/fork.c linux-2.4.13.latxs/kernel/fork.c --- linux-2.4.13.vanilla/kernel/fork.c Tue Oct 23 17:44:15 2001 +++ linux-2.4.13.latxs/kernel/fork.c Thu Oct 25 17:42:38 2001 @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -28,7 +29,7 @@ /* The idle threads do not count.. */ int nr_threads; -int nr_running; +atomic_t gnr_running = ATOMIC_INIT(0); int max_threads; unsigned long total_forks; /* Handle normal Linux uptimes. */ @@ -598,7 +599,7 @@ */ if (nr_threads >= max_threads) goto bad_fork_cleanup_count; - + get_exec_domain(p->exec_domain); if (p->binfmt && p->binfmt->module) @@ -639,7 +640,7 @@ { int i; p->has_cpu = 0; - p->processor = current->processor; + p->processor = clone_flags & CLONE_PID ? current->processor: get_best_cpu(); /* ?? should we just memset this ?? */ for(i = 0; i < smp_num_cpus; i++) p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0; @@ -665,10 +666,10 @@ if (retval) goto bad_fork_cleanup_mm; p->semundo = NULL; - + /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ - + p->parent_exec_id = p->self_exec_id; /* ok, now we should be set up.. */ @@ -687,6 +688,10 @@ if (!current->counter) current->need_resched = 1; + p->cpu_jtime = 0; + p->sched_jtime = jiffies; + p->move_to_cpu = 0; + /* * Ok, add it to the run-queues and make it * visible to the rest of the system. @@ -774,7 +779,7 @@ panic("Cannot create signal action SLAB cache"); files_cachep = kmem_cache_create("files_cache", - sizeof(struct files_struct), 0, + sizeof(struct files_struct), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!files_cachep) panic("Cannot create files SLAB cache"); diff -Nru linux-2.4.13.vanilla/kernel/ksyms.c linux-2.4.13.latxs/kernel/ksyms.c --- linux-2.4.13.vanilla/kernel/ksyms.c Wed Oct 17 14:32:50 2001 +++ linux-2.4.13.latxs/kernel/ksyms.c Thu Oct 25 11:34:51 2001 @@ -443,7 +443,6 @@ #endif EXPORT_SYMBOL(kstat); -EXPORT_SYMBOL(nr_running); /* misc */ EXPORT_SYMBOL(panic); diff -Nru linux-2.4.13.vanilla/kernel/sched.c linux-2.4.13.latxs/kernel/sched.c --- linux-2.4.13.vanilla/kernel/sched.c Wed Oct 17 14:14:37 2001 +++ linux-2.4.13.latxs/kernel/sched.c Sun Oct 28 20:00:25 2001 @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include @@ -74,24 +76,19 @@ * Init task must be ok at boot for the ix86 as we will check its signals * via the SMP irq return path. */ - + struct task_struct * init_tasks[NR_CPUS] = {&init_task, }; /* * The tasklist_lock protects the linked list of processes. * - * The runqueue_lock locks the parts that actually access - * and change the run-queues, and have to be interrupt-safe. - * - * If both locks are to be concurrently held, the runqueue_lock + * If both locks are to be concurrently held, the runqueue_lock(cpu) * nests inside the tasklist_lock. * * task->alloc_lock nests inside tasklist_lock. */ -spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; /* inner */ rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */ -static LIST_HEAD(runqueue_head); /* * We align per-CPU scheduling data on cacheline boundaries, @@ -99,14 +96,52 @@ */ static union { struct schedule_data { + atomic_t qnr_processes; + atomic_t qnr_running; + struct list_head proclist_head; + struct list_head runqueue_head; + spinlock_t runqueue_lock; struct task_struct * curr; - cycles_t last_schedule; + struct latsched_data ls; } schedule_data; char __pad [SMP_CACHE_BYTES]; -} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}}; +} aligned_data [NR_CPUS] __cacheline_aligned; + +#ifdef CONFIG_SMP #define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr -#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule +#define qnr_processes(cpu) aligned_data[(cpu)].schedule_data.qnr_processes +#define qnr_running(cpu) aligned_data[(cpu)].schedule_data.qnr_running +#define proclist_head(cpu) aligned_data[(cpu)].schedule_data.proclist_head +#define runqueue_head(cpu) aligned_data[(cpu)].schedule_data.runqueue_head +#define runqueue_lock(cpu) aligned_data[(cpu)].schedule_data.runqueue_lock + + +#define latsched_data(cpu) aligned_data[(cpu)].schedule_data.ls.ls_data +#define latsched_samp(cpu, idx) aligned_data[(cpu)].schedule_data.ls.ls_data[(idx)] +#define latsched_size(cpu) aligned_data[(cpu)].schedule_data.ls.ls_size +#define latsched_curr(cpu) aligned_data[(cpu)].schedule_data.ls.ls_curr + +#else /* #ifdef CONFIG_SMP */ + +#define cpu_curr(cpu) aligned_data[0].schedule_data.curr +#define qnr_processes(cpu) aligned_data[0].schedule_data.qnr_processes +#define qnr_running(cpu) aligned_data[0].schedule_data.qnr_running +#define proclist_head(cpu) aligned_data[0].schedule_data.proclist_head +#define runqueue_head(cpu) aligned_data[0].schedule_data.runqueue_head +#define runqueue_lock(cpu) aligned_data[0].schedule_data.runqueue_lock + + +#define latsched_data(cpu) aligned_data[0].schedule_data.ls.ls_data +#define latsched_samp(cpu, idx) aligned_data[0].schedule_data.ls.ls_data[(idx)] +#define latsched_size(cpu) aligned_data[0].schedule_data.ls.ls_size +#define latsched_curr(cpu) aligned_data[0].schedule_data.ls.ls_curr + +#endif /* #ifdef CONFIG_SMP */ + + +static atomic_t lss_enabled = ATOMIC_INIT(0); + struct kernel_stat kstat; extern struct task_struct *child_reaper; @@ -124,8 +159,29 @@ #endif +#define rq_lock(p) lock_task_rq(p) +#define rq_unlock(p) spin_unlock(&runqueue_lock(p->processor)) +#define rq_lock_irq(p) do { local_irq_disable(); lock_task_rq(p); } while (0) +#define rq_unlock_irq(p) do { spin_unlock(&runqueue_lock(p->processor)); local_irq_enable(); } while (0) +#define rq_lock_irqsave(p, f) do { local_irq_save(f); lock_task_rq(p); } while (0) +#define rq_unlock_irqrestore(p, f) do { spin_unlock(&runqueue_lock(p->processor)); local_irq_restore(f); } while (0) + + + void scheduling_functions_start_here(void) { } +static inline void lock_task_rq(struct task_struct *p) +{ + int cpu = p->processor; + + spin_lock(&runqueue_lock(cpu)); + while (p->processor != cpu) { + spin_unlock(&runqueue_lock(cpu)); + cpu = p->processor; + spin_lock(&runqueue_lock(cpu)); + } +} + /* * This is the function that decides how desirable a process is.. * You can weigh different processes against each other depending @@ -140,7 +196,7 @@ * +1000: realtime process, select this. */ -static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm) +static inline int goodness(struct task_struct * p, struct mm_struct *this_mm) { int weight; @@ -167,13 +223,12 @@ weight = p->counter; if (!weight) goto out; - -#ifdef CONFIG_SMP - /* Give a largish advantage to the same processor... */ - /* (this is equivalent to penalizing other processors) */ - if (p->processor == this_cpu) - weight += PROC_CHANGE_PENALTY; -#endif + + /* add advantage related to the history of this task on this cpu + * this try to account the cache footprint of p in this_cpu + */ + if (p->cpu_jtime > jiffies) + weight += p->cpu_jtime - jiffies; /* .. and a slight advantage to the current MM */ if (p->mm == this_mm || !p->mm) @@ -196,9 +251,9 @@ * the 'goodness value' of replacing a process on a given CPU. * positive value means 'replace', zero or negative means 'dont'. */ -static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu) +static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p) { - return goodness(p, cpu, prev->active_mm) - goodness(prev, cpu, prev->active_mm); + return goodness(p, prev->active_mm) - goodness(prev, prev->active_mm); } /* @@ -211,92 +266,33 @@ static void reschedule_idle(struct task_struct * p) { #ifdef CONFIG_SMP - int this_cpu = smp_processor_id(); - struct task_struct *tsk, *target_tsk; - int cpu, best_cpu, i, max_prio; - cycles_t oldest_idle; - - /* - * shortcut if the woken up task's last CPU is - * idle now. - */ - best_cpu = p->processor; - if (can_schedule(p, best_cpu)) { - tsk = idle_task(best_cpu); - if (cpu_curr(best_cpu) == tsk) { - int need_resched; -send_now_idle: - /* - * If need_resched == -1 then we can skip sending - * the IPI altogether, tsk->need_resched is - * actively watched by the idle thread. - */ - need_resched = tsk->need_resched; - tsk->need_resched = 1; - if ((best_cpu != this_cpu) && !need_resched) - smp_send_reschedule(best_cpu); - return; - } - } + int best_cpu = p->processor, this_cpu = smp_processor_id(), need_resched; + struct task_struct *tsk; - /* - * We know that the preferred CPU has a cache-affine current - * process, lets try to find a new idle CPU for the woken-up - * process. Select the least recently active idle CPU. (that - * one will have the least active cache context.) Also find - * the executing process which has the least priority. - */ - oldest_idle = (cycles_t) -1; - target_tsk = NULL; - max_prio = 0; - - for (i = 0; i < smp_num_cpus; i++) { - cpu = cpu_logical_map(i); - if (!can_schedule(p, cpu)) - continue; - tsk = cpu_curr(cpu); + tsk = cpu_curr(best_cpu); + if (tsk == idle_task(best_cpu)) { /* - * We use the first available idle CPU. This creates - * a priority list between idle CPUs, but this is not - * a problem. + * If need_resched == -1 then we can skip sending + * the IPI altogether, tsk->need_resched is + * actively watched by the idle thread. */ - if (tsk == idle_task(cpu)) { - if (last_schedule(cpu) < oldest_idle) { - oldest_idle = last_schedule(cpu); - target_tsk = tsk; - } - } else { - if (oldest_idle == -1ULL) { - int prio = preemption_goodness(tsk, p, cpu); - - if (prio > max_prio) { - max_prio = prio; - target_tsk = tsk; - } - } - } - } - tsk = target_tsk; - if (tsk) { - if (oldest_idle != -1ULL) { - best_cpu = tsk->processor; - goto send_now_idle; - } + need_resched = tsk->need_resched; + tsk->need_resched = 1; + if ((best_cpu != this_cpu) && !need_resched) + smp_send_reschedule(best_cpu); + } else if (tsk != p && preemption_goodness(tsk, p) > 0) { tsk->need_resched = 1; if (tsk->processor != this_cpu) smp_send_reschedule(tsk->processor); } - return; - - -#else /* UP */ +#else /* #ifdef CONFIG_SMP */ int this_cpu = smp_processor_id(); struct task_struct *tsk; tsk = cpu_curr(this_cpu); - if (preemption_goodness(tsk, p, this_cpu) > 0) + if (preemption_goodness(tsk, p) > 0) tsk->need_resched = 1; -#endif +#endif /* #ifdef CONFIG_SMP */ } /* @@ -306,22 +302,172 @@ * run-queue, not the end. See the comment about "This is * subtle" in the scheduler proper.. */ -static inline void add_to_runqueue(struct task_struct * p) +static inline void __add_to_runqueue(struct task_struct * p) +{ + list_add(&p->run_list, &runqueue_head(p->processor)); + atomic_inc(&qnr_running(p->processor)); + atomic_inc(&gnr_running); +} + +static inline void __del_from_runqueue(struct task_struct * p) +{ + atomic_dec(&gnr_running); + atomic_dec(&qnr_running(p->processor)); + p->sleep_time = jiffies; + list_del(&p->run_list); + p->run_list.next = NULL; +} + +void del_from_runqueue(struct task_struct * p) +{ + unsigned long flags; + + rq_lock_irqsave(p, flags); + __del_from_runqueue(p); + rq_unlock_irqrestore(p, flags); +} + +static inline void __add_to_proclist(struct task_struct * p) +{ + list_add(&p->proclist_cpu, &proclist_head(p->processor)); + atomic_inc(&qnr_processes(p->processor)); +} + +void add_to_proclist(struct task_struct * p) +{ + unsigned long flags; + + rq_lock_irqsave(p, flags); + __add_to_proclist(p); + rq_unlock_irqrestore(p, flags); +} + +static inline void __del_from_proclist(struct task_struct * p) { - list_add(&p->run_list, &runqueue_head); - nr_running++; + list_del(&p->proclist_cpu); + atomic_dec(&qnr_processes(p->processor)); + p->proclist_cpu.next = NULL; } -static inline void move_last_runqueue(struct task_struct * p) +void del_from_proclist(struct task_struct * p) +{ + unsigned long flags; + + rq_lock_irqsave(p, flags); + __del_from_proclist(p); + rq_unlock_irqrestore(p, flags); +} + +void runqueue_spin_lock(struct task_struct * p) +{ + rq_lock(p); +} + +void runqueue_spin_unlock(struct task_struct * p) +{ + rq_unlock(p); +} + +static inline void __move_last_runqueue(struct task_struct * p) { list_del(&p->run_list); - list_add_tail(&p->run_list, &runqueue_head); + list_add_tail(&p->run_list, &runqueue_head(p->processor)); } -static inline void move_first_runqueue(struct task_struct * p) +static inline void __move_first_runqueue(struct task_struct * p) { list_del(&p->run_list); - list_add(&p->run_list, &runqueue_head); + list_add(&p->run_list, &runqueue_head(p->processor)); +} + +int move_to_cpu(struct task_struct * p, int cpu, int stick) +{ + int res = 0; + unsigned long flags; + + rq_lock_irqsave(p, flags); + if (p == idle_task(p->processor)) BUG(); + if (p->processor != cpu) { + if (!p->move_to_cpu) { + p->move_to_cpu = stick ? -cpu - 1: cpu + 1; + res = 1; + } + } else { + if (stick) + p->cpus_allowed = (1 << cpu); + res = 1; + } + rq_unlock_irqrestore(p, flags); + return res; +} + +/* + * try to find the best cpu to run a fresh new process, no locks are held + * during this function. it gets called by do_fork() in SMP mode + */ +int get_best_cpu(void) +{ + int nr, best_cpu, this_cpu = smp_processor_id(); + int min_nr_running, cpu_running, cpu_processes, min_nr_processes; + + best_cpu = this_cpu; + min_nr_running = atomic_read(&qnr_running(this_cpu)); + min_nr_processes = atomic_read(&qnr_processes(this_cpu)); + for (nr = 0; nr < smp_num_cpus; nr++) { + if (nr == this_cpu) continue; + cpu_running = atomic_read(&qnr_running(nr)); + if (cpu_running < min_nr_running) { + min_nr_running = cpu_running; + min_nr_processes = atomic_read(&qnr_processes(nr)); + best_cpu = nr; + } else if (cpu_running == min_nr_running && + (cpu_processes = atomic_read(&qnr_processes(nr))) < min_nr_processes) { + min_nr_processes = cpu_processes; + best_cpu = nr; + } + } + return best_cpu; +} + +static inline int try_steal_task(int src_cpu, int dst_cpu) +{ + int res = 0; + unsigned long flags; + struct task_struct *tsk; + struct list_head *head, *tmp; + + spin_lock_irqsave(&runqueue_lock(src_cpu), flags); + head = &runqueue_head(src_cpu); + list_for_each(tmp, head) { + tsk = list_entry(tmp, struct task_struct, run_list); + if (can_schedule(tsk, dst_cpu) && !tsk->move_to_cpu) { + tsk->move_to_cpu = dst_cpu + 1; + res = 1; + break; + } + } + spin_unlock_irqrestore(&runqueue_lock(src_cpu), flags); + return res; +} + +/* + * very basic balancing function that search for the most loaded cpu and + * try to steal a process from there, no locks are help during the cpu loop. + */ +int runqueue_balance(int mode) +{ + int nr, this_cpu = smp_processor_id(), max_nr_running = 0, max_cpu = 0; + + for (nr = 0; nr < smp_num_cpus; nr++) { + if (nr == this_cpu) continue; + if (atomic_read(&qnr_running(nr)) > max_nr_running) { + max_nr_running = atomic_read(&qnr_running(nr)); + max_cpu = nr; + } + } + if (max_nr_running > (atomic_read(&qnr_running(this_cpu)) + 1)) + try_steal_task(max_cpu, this_cpu); + return 0; } /* @@ -340,16 +486,16 @@ /* * We want the common case fall through straight, thus the goto. */ - spin_lock_irqsave(&runqueue_lock, flags); + rq_lock_irqsave(p, flags); p->state = TASK_RUNNING; - if (task_on_runqueue(p)) + if (task_on_runqueue(p) || p->move_to_cpu) goto out; - add_to_runqueue(p); - if (!synchronous || !(p->cpus_allowed & (1 << smp_processor_id()))) + __add_to_runqueue(p); + if (!synchronous || p->processor != smp_processor_id()) reschedule_idle(p); success = 1; out: - spin_unlock_irqrestore(&runqueue_lock, flags); + rq_unlock_irqrestore(p, flags); return success; } @@ -382,7 +528,7 @@ * delivered to the current task. In this case the remaining time * in jiffies will be returned, or 0 if the timer expired in time * - * The current task state is guaranteed to be TASK_RUNNING when this + * The current task state is guaranteed to be TASK_RUNNING when this * routine returns. * * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule @@ -475,41 +621,7 @@ task_lock(prev); prev->has_cpu = 0; mb(); - if (prev->state == TASK_RUNNING) - goto needs_resched; - -out_unlock: task_unlock(prev); /* Synchronise here with release_task() if prev is TASK_ZOMBIE */ - return; - - /* - * Slow path - we 'push' the previous process and - * reschedule_idle() will attempt to find a new - * processor for it. (but it might preempt the - * current process as well.) We must take the runqueue - * lock and re-check prev->state to be correct. It might - * still happen that this process has a preemption - * 'in progress' already - but this is not a problem and - * might happen in other circumstances as well. - */ -needs_resched: - { - unsigned long flags; - - /* - * Avoid taking the runqueue lock in cases where - * no preemption-check is necessery: - */ - if ((prev == idle_task(smp_processor_id())) || - (policy & SCHED_YIELD)) - goto out_unlock; - - spin_lock_irqsave(&runqueue_lock, flags); - if ((prev->state == TASK_RUNNING) && !prev->has_cpu) - reschedule_idle(prev); - spin_unlock_irqrestore(&runqueue_lock, flags); - goto out_unlock; - } #else prev->policy &= ~SCHED_YIELD; #endif /* CONFIG_SMP */ @@ -530,15 +642,14 @@ * tasks can run. It can not be killed, and it cannot sleep. The 'state' * information in task[0] is never used. */ -asmlinkage void schedule(void) +static inline void __schedule(void) { struct schedule_data * sched_data; struct task_struct *prev, *next, *p; - struct list_head *tmp; + struct list_head *head, *tmp; int this_cpu, c; - - spin_lock_prefetch(&runqueue_lock); + spin_lock_prefetch(&runqueue_lock(current->processor)); if (!current->active_mm) BUG(); need_resched_back: @@ -556,7 +667,7 @@ */ sched_data = & aligned_data[this_cpu].schedule_data; - spin_lock_irq(&runqueue_lock); + spin_lock_irq(&runqueue_lock(this_cpu)); /* move an exhausted RR process to be last.. */ if (prev->policy == SCHED_RR) @@ -570,10 +681,17 @@ break; } default: - del_from_runqueue(prev); + __del_from_runqueue(prev); case TASK_RUNNING:; } prev->need_resched = 0; + /* we certainly do not want to do this onto the idle task */ + if (prev != idle_task(this_cpu)) { + /* this save the cpu time that has not been consumed by the previous preemption */ + prev->cpu_jtime = prev->cpu_jtime > prev->sched_jtime ? (prev->cpu_jtime - prev->sched_jtime) >> 1: 0; + /* recalculate the cpu time */ + prev->cpu_jtime += (jiffies - prev->sched_jtime) + jiffies; + } /* * this is the scheduler proper: @@ -589,10 +707,11 @@ goto still_running; still_running_back: - list_for_each(tmp, &runqueue_head) { + head = &runqueue_head(this_cpu); + list_for_each(tmp, head) { p = list_entry(tmp, struct task_struct, run_list); if (can_schedule(p, this_cpu)) { - int weight = goodness(p, this_cpu, prev->active_mm); + int weight = goodness(p, prev->active_mm); if (weight > c) c = weight, next = p; } @@ -601,6 +720,12 @@ /* Do we need to re-calculate counters? */ if (!c) goto recalculate; + +#ifdef CONFIG_SMP + if (next->move_to_cpu) + goto cpu_migrate; +cpu_migrate_back: +#endif /* #ifdef CONFIG_SMP */ /* * from this point on nothing can prevent us from * switching to the next task, save this fact in @@ -609,9 +734,9 @@ sched_data->curr = next; #ifdef CONFIG_SMP next->has_cpu = 1; - next->processor = this_cpu; -#endif - spin_unlock_irq(&runqueue_lock); +#endif /* #ifdef CONFIG_SMP */ + next->sched_jtime = jiffies; + spin_unlock_irq(&runqueue_lock(this_cpu)); if (prev == next) { /* We won't go through the normal tail, so do this by hand */ @@ -619,24 +744,6 @@ goto same_process; } -#ifdef CONFIG_SMP - /* - * maintain the per-process 'last schedule' value. - * (this has to be recalculated even if we reschedule to - * the same process) Currently this is only used on SMP, - * and it's approximate, so we do not have to maintain - * it while holding the runqueue spinlock. - */ - sched_data->last_schedule = get_cycles(); - - /* - * We drop the scheduler lock early (it's a global spinlock), - * thus we have to lock the previous process from getting - * rescheduled during switch_to(). - */ - -#endif /* CONFIG_SMP */ - kstat.context_swtch++; /* * there are 3 processes which are affected by a context switch: @@ -683,30 +790,71 @@ recalculate: { - struct task_struct *p; - spin_unlock_irq(&runqueue_lock); + spin_unlock_irq(&runqueue_lock(this_cpu)); read_lock(&tasklist_lock); - for_each_task(p) + head = &proclist_head(this_cpu); + list_for_each(tmp, head) { + p = list_entry(tmp, struct task_struct, proclist_cpu); p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice); + } read_unlock(&tasklist_lock); - spin_lock_irq(&runqueue_lock); + spin_lock_irq(&runqueue_lock(this_cpu)); } goto repeat_schedule; still_running: if (!(prev->cpus_allowed & (1UL << this_cpu))) goto still_running_back; - c = goodness(prev, this_cpu, prev->active_mm); + c = goodness(prev, prev->active_mm); next = prev; goto still_running_back; move_rr_last: if (!prev->counter) { prev->counter = NICE_TO_TICKS(prev->nice); - move_last_runqueue(prev); + __move_last_runqueue(prev); } goto move_rr_back; +#ifdef CONFIG_SMP +cpu_migrate: + { + int move_cpu, next_cpu, stick; + + if (next == prev) { + next = idle_task(this_cpu); + next->need_resched = 1; + goto cpu_migrate_back; + } + if (next->move_to_cpu > 0) + move_cpu = next->move_to_cpu - 1, stick = 0; + else + move_cpu = -next->move_to_cpu - 1, stick = 1; + __del_from_runqueue(next); + spin_unlock_irq(&runqueue_lock(this_cpu)); + + write_lock_irq(&tasklist_lock); + lock_task_rq(next); + __del_from_proclist(next); + next_cpu = next->processor; + next->processor = move_cpu; + if (stick) + next->cpus_allowed = (1 << move_cpu); + spin_unlock(&runqueue_lock(next_cpu)); + + spin_lock(&runqueue_lock(move_cpu)); + __add_to_proclist(next); + __add_to_runqueue(next); + next->move_to_cpu = 0; + reschedule_idle(next); + spin_unlock(&runqueue_lock(move_cpu)); + write_unlock_irq(&tasklist_lock); + + spin_lock_irq(&runqueue_lock(this_cpu)); + } + goto repeat_schedule; +#endif /* #ifdef CONFIG_SMP */ + scheduling_in_interrupt: printk("Scheduling in interrupt\n"); BUG(); @@ -730,7 +878,7 @@ CHECK_MAGIC_WQHEAD(q); WQ_CHECK_LIST_HEAD(&q->task_list); - + list_for_each(tmp,&q->task_list) { unsigned int state; wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list); @@ -849,7 +997,7 @@ long sleep_on_timeout(wait_queue_head_t *q, long timeout) { SLEEP_ON_VAR - + current->state = TASK_UNINTERRUPTIBLE; SLEEP_ON_HEAD @@ -907,7 +1055,7 @@ return tsk; } -static int setscheduler(pid_t pid, int policy, +static int setscheduler(pid_t pid, int policy, struct sched_param *param) { struct sched_param lp; @@ -926,14 +1074,14 @@ * We play safe to avoid deadlocks. */ read_lock_irq(&tasklist_lock); - spin_lock(&runqueue_lock); p = find_process_by_pid(pid); retval = -ESRCH; if (!p) - goto out_unlock; - + goto out_unlock_tkll; + + rq_lock(p); if (policy < 0) policy = p->policy; else { @@ -942,7 +1090,7 @@ policy != SCHED_OTHER) goto out_unlock; } - + /* * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid * priority for SCHED_OTHER is 0. @@ -954,7 +1102,7 @@ goto out_unlock; retval = -EPERM; - if ((policy == SCHED_FIFO || policy == SCHED_RR) && + if ((policy == SCHED_FIFO || policy == SCHED_RR) && !capable(CAP_SYS_NICE)) goto out_unlock; if ((current->euid != p->euid) && (current->euid != p->uid) && @@ -965,19 +1113,20 @@ p->policy = policy; p->rt_priority = lp.sched_priority; if (task_on_runqueue(p)) - move_first_runqueue(p); + __move_first_runqueue(p); current->need_resched = 1; out_unlock: - spin_unlock(&runqueue_lock); + rq_unlock(p); +out_unlock_tkll: read_unlock_irq(&tasklist_lock); out_nounlock: return retval; } -asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, +asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, struct sched_param *param) { return setscheduler(pid, policy, param); @@ -1042,29 +1191,13 @@ asmlinkage long sys_sched_yield(void) { /* - * Trick. sched_yield() first counts the number of truly + * Trick. sched_yield() first counts the number of truly * 'pending' runnable processes, then returns if it's * only the current processes. (This test does not have * to be atomic.) In threaded applications this optimization * gets triggered quite often. */ - - int nr_pending = nr_running; - -#if CONFIG_SMP - int i; - - // Subtract non-idle processes running on other CPUs. - for (i = 0; i < smp_num_cpus; i++) { - int cpu = cpu_logical_map(i); - if (aligned_data[cpu].schedule_data.curr != idle_task(cpu)) - nr_pending--; - } -#else - // on UP this process is on the runqueue as well - nr_pending--; -#endif - if (nr_pending) { + if (atomic_read(&qnr_running(current->processor)) > 1) { /* * This process can only be rescheduled by us, * so this is safe without any locking. @@ -1259,7 +1392,7 @@ /* We also take the runqueue_lock while altering task fields * which affect scheduling decisions */ - spin_lock(&runqueue_lock); + rq_lock(this_task); this_task->ptrace = 0; this_task->nice = DEF_NICE; @@ -1274,7 +1407,7 @@ memcpy(this_task->rlim, init_task.rlim, sizeof(*(this_task->rlim))); this_task->user = INIT_USER; - spin_unlock(&runqueue_lock); + rq_unlock(this_task); write_unlock_irq(&tasklist_lock); } @@ -1320,10 +1453,11 @@ if (current != &init_task && task_on_runqueue(current)) { printk("UGH! (%d:%d) was on the runqueue, removing.\n", smp_processor_id(), current->pid); - del_from_runqueue(current); + __del_from_runqueue(current); } + current->cpu_jtime = 0; + current->sched_jtime = jiffies; sched_data->curr = current; - sched_data->last_schedule = get_cycles(); clear_bit(current->processor, &wait_init_idle); } @@ -1335,8 +1469,16 @@ * We have to do a little magic to get the first * process right in SMP mode. */ - int cpu = smp_processor_id(); - int nr; + int nr, cpu = smp_processor_id(); + + for (nr = 0; nr < NR_CPUS; nr++) { + atomic_set(&qnr_processes(nr), 0); + atomic_set(&qnr_running(nr), 0); + cpu_curr(nr) = &init_task; + INIT_LIST_HEAD(&runqueue_head(nr)); + INIT_LIST_HEAD(&proclist_head(nr)); + runqueue_lock(nr) = SPIN_LOCK_UNLOCKED; + } init_task.processor = cpu; @@ -1355,3 +1497,143 @@ atomic_inc(&init_mm.mm_count); enter_lazy_tlb(&init_mm, current, cpu); } + + +void __init latsched_init(void) +{ + int ii, size; + + size = STD_LATSCHED_SAMPLES; + for (ii = 0; ii < smp_num_cpus; ii++) { + if ((latsched_data(ii) = kmalloc(size * sizeof(struct latsched_sample), GFP_KERNEL))) + memset(latsched_data(ii), 0, size * sizeof(struct latsched_sample)); + latsched_size(ii) = size; + latsched_curr(ii) = 0; + } +} + + +asmlinkage void schedule(void) +{ + int this_cpu; + unsigned long flags; + cycles_t cycls; + + if (atomic_read(&lss_enabled)) { + local_irq_save(flags); + this_cpu = current->processor; + latsched_samp(this_cpu, latsched_curr(this_cpu)).lss_pid = -1; + latsched_samp(this_cpu, latsched_curr(this_cpu)).lss_in = get_cycles(); + local_irq_restore(flags); + } + + __schedule(); + + cycls = get_cycles(); + if (atomic_read(&lss_enabled)) { + local_irq_save(flags); + this_cpu = current->processor; + if (latsched_samp(this_cpu, latsched_curr(this_cpu)).lss_pid == -1) { + latsched_samp(this_cpu, latsched_curr(this_cpu)).lss_out = cycls; + latsched_samp(this_cpu, latsched_curr(this_cpu)).lss_pid = current->pid; + if (++latsched_curr(this_cpu) >= latsched_size(this_cpu)) + latsched_curr(this_cpu) = 0; + } + local_irq_restore(flags); + } +} + + +int latsched_start(int on) +{ + int res; + + cli(); + if (on) { + if (!atomic_read(&lss_enabled)) { + int ii; + + for (ii = 0; ii < smp_num_cpus; ii++) { + res = -ENOMEM; + if (!latsched_data(ii) && + !(latsched_data(ii) = kmalloc(latsched_size(ii) * sizeof(struct latsched_sample), GFP_KERNEL))) + goto out; + memset(latsched_data(ii), 0, latsched_size(ii) * sizeof(struct latsched_sample)); + latsched_curr(ii) = 0; + } + atomic_set(&lss_enabled, 1); + } + } else + atomic_set(&lss_enabled, 0); + res = 0; +out: + sti(); + return res; +} + + +int latsched_setsamples(int nsamps) +{ + int ii, res, size = nsamps; + + cli(); + res = -EBUSY; + if (atomic_read(&lss_enabled)) + goto out; + for (ii = 0; ii < smp_num_cpus; ii++) { + if (latsched_data(ii)) + kfree(latsched_data(ii)); + res = -ENOMEM; + if (!(latsched_data(ii) = kmalloc(size * sizeof(struct latsched_sample), GFP_KERNEL))) + goto out; + memset(latsched_data(ii), 0, size * sizeof(struct latsched_sample)); + latsched_size(ii) = size; + latsched_curr(ii) = 0; + } + res = 0; +out: + sti(); + return res; +} + + +int latsched_getdata(struct lsctl_getdata *lsgd) +{ + int res; + + cli(); + res = -EBUSY; + if (atomic_read(&lss_enabled)) + goto out; + res = -EINVAL; + if (lsgd->cpu < 0 || lsgd->cpu >= smp_num_cpus) + goto out; + if (latsched_samp(lsgd->cpu, latsched_size(lsgd->cpu) - 1).lss_pid != 0) { + int size, csize; + struct latsched_sample *data = lsgd->data; + + lsgd->rsize = size = latsched_size(lsgd->cpu); + if (lsgd->rsize > lsgd->size) + lsgd->rsize = size = lsgd->size; + csize = latsched_size(lsgd->cpu) - latsched_curr(lsgd->cpu); + if (csize > size) + csize = size; + if (csize) + __copy_to_user(data, &latsched_samp(lsgd->cpu, latsched_curr(lsgd->cpu)), + csize * sizeof(struct latsched_sample)); + data += csize; + size -= csize; + if (size) + __copy_to_user(data, &latsched_samp(lsgd->cpu, 0), + size * sizeof(struct latsched_sample)); + } else { + lsgd->rsize = latsched_curr(lsgd->cpu); + __copy_to_user(lsgd->data, &latsched_samp(lsgd->cpu, 0), + lsgd->rsize * sizeof(struct latsched_sample)); + } + res = 0; +out: + sti(); + return res; +} + diff -Nru linux-2.4.13.vanilla/kernel/signal.c linux-2.4.13.latxs/kernel/signal.c --- linux-2.4.13.vanilla/kernel/signal.c Mon Sep 17 16:40:01 2001 +++ linux-2.4.13.latxs/kernel/signal.c Thu Oct 25 11:34:51 2001 @@ -478,10 +478,10 @@ * process of changing - but no harm is done by that * other than doing an extra (lightweight) IPI interrupt. */ - spin_lock(&runqueue_lock); + runqueue_spin_lock(t); if (t->has_cpu && t->processor != smp_processor_id()) smp_send_reschedule(t->processor); - spin_unlock(&runqueue_lock); + runqueue_spin_unlock(t); #endif /* CONFIG_SMP */ if (t->state & TASK_INTERRUPTIBLE) { diff -Nru linux-2.4.13.vanilla/kernel/softirq.c linux-2.4.13.latxs/kernel/softirq.c --- linux-2.4.13.vanilla/kernel/softirq.c Sat Sep 8 12:02:32 2001 +++ linux-2.4.13.latxs/kernel/softirq.c Thu Oct 25 11:34:51 2001 @@ -369,7 +369,7 @@ sigfillset(¤t->blocked); /* Migrate to the right CPU */ - current->cpus_allowed = 1UL << cpu; + if (!move_to_cpu(current, cpu, 1)) BUG(); while (smp_processor_id() != cpu) schedule();