diff -U 5 -r linux-2.2.14.vanilla/include/linux/sched.h linux-2.2.14/include/linux/sched.h
--- linux-2.2.14.vanilla/include/linux/sched.h	Thu Jan  6 00:31:31 2000
+++ linux-2.2.14/include/linux/sched.h	Sat Jan 22 00:09:58 2000
@@ -225,10 +225,16 @@
  * Right now it is only used to track how many processes a
  * user has, but it has the potential to track memory usage etc.
  */
 struct user_struct;
 
+/* For task goodness clustering */
+struct gds_slot_struct {
+	struct task_struct *__next;
+	struct task_struct **__pprev;
+};
+
 struct task_struct {
 /* these are hardcoded - don't touch */
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
 	unsigned long flags;	/* per process flags, defined below */
 	int sigpending;
@@ -238,18 +244,22 @@
 					 */
 	struct exec_domain *exec_domain;
 	long need_resched;
 
 /* various fields */
+	cycles_t avg_slice;
+	unsigned long policy;
 	long counter;
 	long priority;
-	cycles_t avg_slice;
+/* memory management info */
+	struct mm_struct *mm;
 /* SMP and runqueue state */
 	int has_cpu;
 	int processor;
 	int last_processor;
 	int lock_depth;		/* Lock depth. We can context switch in and out of holding a syscall kernel lock... */	
+	struct gds_slot_struct gss;
 	struct task_struct *next_task, *prev_task;
 	struct task_struct *next_run,  *prev_run;
 
 /* task state */
 	struct linux_binfmt *binfmt;
@@ -279,11 +289,11 @@
 	/* Pointer to task[] array linkage. */
 	struct task_struct **tarray_ptr;
 
 	struct wait_queue *wait_chldexit;	/* for wait4() */
 	struct semaphore *vfork_sem;		/* for vfork() */
-	unsigned long policy, rt_priority;
+	unsigned long rt_priority;
 	unsigned long it_real_value, it_prof_value, it_virt_value;
 	unsigned long it_real_incr, it_prof_incr, it_virt_incr;
 	struct timer_list real_timer;
 	struct tms times;
 	unsigned long start_time;
@@ -312,12 +322,10 @@
 	struct thread_struct tss;
 /* filesystem information */
 	struct fs_struct *fs;
 /* open file information */
 	struct files_struct *files;
-/* memory management info */
-	struct mm_struct *mm;
 
 /* signal handlers */
 	spinlock_t sigmask_lock;	/* Protects signal and blocked */
 	struct signal_struct *sig;
 	sigset_t signal, blocked;
@@ -361,21 +369,24 @@
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
  */
 #define INIT_TASK \
 /* state etc */	{ 0,0,0,KERNEL_DS,&default_exec_domain,0, \
-/* counter */	DEF_PRIORITY,DEF_PRIORITY,0, \
+/* avg_slice */	0, \
+/* policy */	SCHED_OTHER, DEF_PRIORITY,DEF_PRIORITY, \
+/* mm */	&init_mm, \
 /* SMP */	0,0,0,-1, \
+/* gss */	{ NULL, NULL }, \
 /* schedlink */	&init_task,&init_task, &init_task, &init_task, \
 /* binfmt */	NULL, \
 /* ec,brk... */	0,0,0,0,0,0, \
 /* pid etc.. */	0,0,0,0,0, \
 /* proc links*/ &init_task,&init_task,NULL,NULL,NULL, \
 /* pidhash */	NULL, NULL, \
 /* tarray */	&task[0], \
 /* chld wait */	NULL, NULL, \
-/* timeout */	SCHED_OTHER,0,0,0,0,0,0,0, \
+/* timeout */	0,0,0,0,0,0,0, \
 /* timer */	{ NULL, NULL, 0, 0, it_real_fn }, \
 /* utime */	{0,0,0,0},0, \
 /* per CPU times */ {0, }, {0, }, \
 /* flt */	0,0,0,0,0,0, \
 /* swp */	0, \
@@ -390,11 +401,10 @@
 /* fs info */	0,NULL, \
 /* ipc */	NULL, NULL, \
 /* tss */	INIT_TSS, \
 /* fs */	&init_fs, \
 /* files */	&init_files, \
-/* mm */	&init_mm, \
 /* signals */	SPIN_LOCK_UNLOCKED, &init_signals, {{0}}, {{0}}, NULL, &init_task.sigqueue, 0, 0, \
 /* exec cts */	0,0, \
 }
 
 union task_union {
diff -U 5 -r linux-2.2.14.vanilla/kernel/fork.c linux-2.2.14/kernel/fork.c
--- linux-2.2.14.vanilla/kernel/fork.c	Sat Oct 23 20:03:32 1999
+++ linux-2.2.14/kernel/fork.c	Fri Jan 21 22:55:53 2000
@@ -688,10 +688,14 @@
 	/* ok, now we should be set up.. */
 	p->swappable = 1;
 	p->exit_signal = clone_flags & CSIGNAL;
 	p->pdeath_signal = 0;
 
+/* Task goodness slotizer. */
+	p->gss.__next = NULL;
+	p->gss.__pprev = NULL;
+
 	/*
 	 * "share" dynamic priority between parent and child, thus the
 	 * total amount of dynamic priorities in the system doesnt change,
 	 * more scheduling fairness. This is only important in the first
 	 * timeslice, on the long run the scheduling behaviour is unchanged.
diff -U 5 -r linux-2.2.14.vanilla/kernel/sched.c linux-2.2.14/kernel/sched.c
--- linux-2.2.14.vanilla/kernel/sched.c	Thu Jan  6 00:27:52 2000
+++ linux-2.2.14/kernel/sched.c	Sat Feb  5 13:59:50 2000
@@ -122,10 +122,61 @@
 #define idle_task(cpu) (&init_task)
 #define can_schedule(p) (1)
 
 #endif
 
+
+
+/* 
+ * Task goodness slotizer ( begin ).
+ */
+#define MAX_GDS		48	/*
+						 * Estimated maximum goodness value. 
+						 * All task with a greater one stay in (MAX_SLOTS - 1).
+						 */
+#define SLOT_SHIFT	0	/* Shift factor for slot arithmetic. */
+#define GDS_STEP	(1 << SLOT_SHIFT) /* Goodness step */
+#define MAX_SLOTS	(MAX_GDS / GDS_STEP) /* Number of goodness slots */
+#define GDS_LOWER	DEF_PRIORITY	/* Goodness lower level, tasks lower than this goes in slot 1 */
+#define GDS_SLOT(c)	(((c) >= GDS_LOWER) ? (((c) - GDS_LOWER) >> SLOT_SHIFT): 0)
+
+/* The -1 is because slots is incremented by 1 to keep slot 0 for processes really exhausted. */
+#define SLOT_GDS_BASE(s)	((((s) - 1) << SLOT_SHIFT) + GDS_LOWER)
+
+/* This great value for goodness in slot (MAX_SLOTS - 1) ensure that al tasks 
+ * in that slot ( commonly RT processes ) are totally tested before exiting.
+ */
+#define MAX_LINUX_GOODNESS	10000
+/* Maximum goodness possible in slot s */
+#define SLOT_GDS_MAX(s)		(((s) < (MAX_SLOTS - 1)) ? \
+				(SLOT_GDS_BASE(s) + ((1 << SLOT_SHIFT) - 1)) : MAX_LINUX_GOODNESS)
+
+/* Return the task_struct pointer that contain the gds_slot_struct pointed by "s" */
+#define TGDS_HEAD(s)		((struct task_struct *) ((char *) &(s)->__next - (int) &(((struct task_struct *) 0)->gss.__next)))
+
+/* Define the initial runqueue load that will trigger turbo mode */
+#define GDS_LOADTRIGGER	5
+
+/* Define the initial delay that turbo scheduler will wait until boot */
+#define GDS_WAIT_SPINS	2
+
+/*
+ * Task goodness slots.
+ * gds_slots[0] contain all exhausted processes and is skipped from scan in schedule().
+ */
+static struct gds_slot_struct gds_slots_init[MAX_SLOTS]; 	/* Init image to speedup turbo boot */
+static struct gds_slot_struct gds_slots[MAX_SLOTS];
+static int start_gdslot; 	/* Index at which start iteration */
+static int turbo_sched; 	/* Turbo scheduler switcher */
+static int turbo_sched_waitspins;	/* Define the delay that turbo scheduler will wait until boot */
+static int turbo_sched_loadtrigger; /* Define the runqueue load that will trigger turbo mode */
+
+/* 
+ * Task goodness slotizer ( end ).
+ */
+
+
 void scheduling_functions_start_here(void) { }
 
 /*
  * This is the function that decides how desirable a process is..
  * You can weigh different processes against each other depending
@@ -355,10 +406,149 @@
 	 * Pass #2
 	 */
 	reschedule_idle_slow(p);
 }
 
+/* 
+ * Task goodness slotizer ( begin ).
+ */
+/* Initialize goodness slots circular lists. */
+static void gds_init(void)
+{
+	int ii;
+
+	for (ii = 0; ii < MAX_SLOTS; ii++) {
+		gds_slots[ii].__pprev = &gds_slots[ii].__next;
+		gds_slots[ii].__next = TGDS_HEAD(&gds_slots[ii]);
+	}
+
+	memcpy(gds_slots_init, gds_slots, sizeof(gds_slots_init));
+
+	start_gdslot = MAX_SLOTS - 1;
+	turbo_sched = 0; /* Start in normal mode */
+	turbo_sched_waitspins = GDS_WAIT_SPINS;
+	turbo_sched_loadtrigger = GDS_LOADTRIGGER;
+}
+
+/* Add task to ( tail ) goodness slot ( want "runqueue_lock" ). */
+static inline void gds_tadd_task(struct task_struct * ts)
+{
+	/* It's important to compute the maximum goodness possible for ts, 
+	 * so that we can stop iterate in schedule() when we find a process
+	 * that maintain his goodness promise.
+	 */
+	int weight = goodness(ts, ts, ts->processor),
+		slot = 0;
+	struct task_struct * qh;
+	/* Exhausted processes fall in slot 0 to avoid to scan them in schedule(). */
+	if (weight > 0)
+	{
+		if ((slot = GDS_SLOT(weight)) < (MAX_SLOTS - 1))
+			++slot; /* We keep slot 0 for really exhausted processes */
+		else
+			slot = MAX_SLOTS - 1;
+		if (slot > start_gdslot)
+			start_gdslot = slot; /* Update iteration start index. */
+	}
+	qh = TGDS_HEAD(&gds_slots[slot]);
+	ts->gss.__pprev = qh->gss.__pprev;
+	*qh->gss.__pprev = ts;
+	qh->gss.__pprev = &ts->gss.__next;
+	ts->gss.__next = qh;
+}
+
+/* Add task to ( head ) goodness slot ( want "runqueue_lock" ). */
+static inline void gds_hadd_task(struct task_struct * ts)
+{
+	/* It's important to compute the maximum goodness possible for ts, 
+	 * so that we can stop iterate in schedule() when we find a process
+	 * that maintain his goodness promise.
+	 */
+	int weight = goodness(ts, ts, ts->processor),
+		slot = 0;
+	struct task_struct * qh;
+	/* Exhausted processes fall in slot 0 to avoid to scan them in schedule(). */
+	if (weight > 0)
+	{
+		if ((slot = GDS_SLOT(weight)) < (MAX_SLOTS - 1))
+			++slot; /* We keep slot 0 for really exhausted processes */
+		else
+			slot = MAX_SLOTS - 1;
+		if (slot > start_gdslot)
+			start_gdslot = slot; /* Update iteration start index. */
+	}
+	qh = TGDS_HEAD(&gds_slots[slot]);
+	qh = qh->gss.__next;
+	ts->gss.__pprev = qh->gss.__pprev;
+	*qh->gss.__pprev = ts;
+	qh->gss.__pprev = &ts->gss.__next;
+	ts->gss.__next = qh;
+}
+
+/* Remove task to goodness slot ( want "runqueue_lock" ). */
+static inline int gds_remove_task(struct task_struct * ts)
+{
+	if (ts->gss.__pprev) {
+		ts->gss.__next->gss.__pprev = ts->gss.__pprev;
+		*ts->gss.__pprev = ts->gss.__next;
+		ts->gss.__pprev = NULL;
+		return (1);
+	}
+	return (0);
+}
+
+/* Switch task from goodness slots ( want "runqueue_lock" ). */
+static inline void gds_switch(struct task_struct * ts)
+{
+	if (gds_remove_task(ts))
+		gds_tadd_task(ts);
+}
+
+/* Used after a turbo boot to reinsert tasks in clusters ( want "runqueue_lock" ). */
+static inline void gds_switch_init(struct task_struct * ts)
+{
+	ts->gss.__pprev = NULL;
+	ts->gss.__next = NULL;
+	if (ts->next_run)
+		gds_tadd_task(ts);
+}
+
+
+/* Measure the load factor and decide if switch in normal_mode or in turbo_mode. 
+ * This must be called only before the recalculate loop.
+ * It returns 1 if it does a switch, 0 otherwise.
+ */
+static inline int gds_try_switch(void)
+{
+	static int spins = 0;
+
+	if (nr_running > turbo_sched_loadtrigger) {
+		if (!turbo_sched) {
+			if (++spins > turbo_sched_waitspins) {
+				turbo_sched = 1;
+				spins = 0;
+				return 1;
+			}
+		}
+	} else {
+		if (turbo_sched) {
+			if (++spins > turbo_sched_waitspins) {
+				turbo_sched = 0;
+				memcpy(gds_slots, gds_slots_init, sizeof(gds_slots));
+				spins = 0;
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+/* 
+ * Task goodness slotizer ( end ).
+ */
+
+
+
 /*
  * Careful!
  *
  * This has to add the process to the _beginning_ of the
  * run-queue, not the end. See the comment about "This is
@@ -371,10 +561,13 @@
 	p->prev_run = &init_task;
 	init_task.next_run = p;
 	p->next_run = next;
 	next->prev_run = p;
 	nr_running++;
+    /* Add task to goodness slot ( want "runqueue_lock" ). */
+	if (turbo_sched)
+		gds_hadd_task(p);
 }
 
 static inline void del_from_runqueue(struct task_struct * p)
 {
 	struct task_struct *next = p->next_run;
@@ -383,10 +576,13 @@
 	nr_running--;
 	next->prev_run = prev;
 	prev->next_run = next;
 	p->next_run = NULL;
 	p->prev_run = NULL;
+    /* Remove task to goodness slot ( want "runqueue_lock" ). */
+	if (turbo_sched)
+		gds_remove_task(p);
 }
 
 static inline void move_last_runqueue(struct task_struct * p)
 {
 	struct task_struct *next = p->next_run;
@@ -399,10 +595,15 @@
 	p->next_run = &init_task;
 	prev = init_task.prev_run;
 	init_task.prev_run = p;
 	p->prev_run = prev;
 	prev->next_run = p;
+    /* Change task to goodness slot ( want "runqueue_lock" ). */
+	if (turbo_sched) {
+		gds_remove_task(p);
+		gds_tadd_task(p);
+	}
 }
 
 static inline void move_first_runqueue(struct task_struct * p)
 {
 	struct task_struct *next = p->next_run;
@@ -415,10 +616,15 @@
 	p->prev_run = &init_task;
 	next = init_task.next_run;
 	init_task.next_run = p;
 	p->next_run = next;
 	next->prev_run = p;
+    /* Change task to goodness slot ( want "runqueue_lock" ). */
+	if (turbo_sched) {
+		gds_remove_task(p);
+		gds_hadd_task(p);
+	}
 }
 
 /*
  * The tasklist_lock protects the linked list of processes.
  *
@@ -712,10 +918,16 @@
 	 */
 	sched_data = & aligned_data[this_cpu].schedule_data;
 
 	spin_lock_irq(&runqueue_lock);
 
+	/* Change task goodness slot ( want "runqueue_lock" ).
+	 * Perhaps this can be moved out of fast path.
+	 */
+	if (turbo_sched)
+		gds_switch(prev);
+
 	/* move an exhausted RR process to be last.. */
 	if (prev->policy == SCHED_RR)
 		goto move_rr_last;
 move_rr_back:
 
@@ -735,45 +947,84 @@
 
 	/*
 	 * this is the scheduler proper:
 	 */
 
-	p = init_task.next_run;
 	/* Default process to select.. */
 	next = idle_task(this_cpu);
 	c = -1000;
 	if (prev->state == TASK_RUNNING)
 		goto still_running;
 still_running_back:
 
-	/*
-	 * This is subtle.
-	 * Note how we can enable interrupts here, even
-	 * though interrupts can add processes to the run-
-	 * queue. This is because any new processes will
-	 * be added to the front of the queue, so "p" above
-	 * is a safe starting point.
-	 * run-queue deletion and re-ordering is protected by
-	 * the scheduler lock
-	 */
+	if (turbo_sched) {
+		int ii, cslots, gdsmax, weight;
+		struct task_struct *qh;
+		/* Scan task goodness slots ( want "runqueue_lock" ).
+		 * Note that "ii > 0" skip all exhausted processes in slot 0 .
+		 */
+		for (ii = start_gdslot, cslots = 0; ii > 0; ii--) {
+			qh = TGDS_HEAD(&gds_slots[ii]);
+			if ((p = qh->gss.__next) != qh) {
+				gdsmax = SLOT_GDS_MAX(ii); /* Max goodness in slot. */
+				if (!cslots) /* Remember iteration start index. */
+					start_gdslot = ii, ++cslots;
+				if (c >= gdsmax)
+					goto task_found;
+				do {
+					if (can_schedule(p)) {
+						if ((weight = goodness(prev, p, this_cpu)) > c) {
+							c = weight, next = p;
+							if (c >= gdsmax)
+								goto task_found;
+						}
+					}
+				} while ((p = p->gss.__next) != qh);
+			}
+			/* Goodness promise has been maintained, we've found the President ! */
+			if (c >= SLOT_GDS_BASE(ii))
+				goto task_found;
+		}
+
+		/* Do we need to re-calculate counters? */
+		if ((c <= 0) && ((qh = TGDS_HEAD(&gds_slots[0]))->gss.__next != qh))
+			goto recalculate;
+
+	} else {
+		/*
+		 * This is subtle.
+		 * Note how we can enable interrupts here, even
+		 * though interrupts can add processes to the run-
+		 * queue. This is because any new processes will
+		 * be added to the front of the queue, so "p" above
+		 * is a safe starting point.
+		 * run-queue deletion and re-ordering is protected by
+		 * the scheduler lock
+		 */
 /*
  * Note! there may appear new tasks on the run-queue during this, as
  * interrupts are enabled. However, they will be put on front of the
  * list, so our list starting at "p" is essentially fixed.
  */
-	while (p != &init_task) {
-		if (can_schedule(p)) {
-			int weight = goodness(prev, p, this_cpu);
-			if (weight > c)
-				c = weight, next = p;
+		p = init_task.next_run;
+
+		while (p != &init_task) {
+			if (can_schedule(p)) {
+				int weight = goodness(prev, p, this_cpu);
+				if (weight > c)
+					c = weight, next = p;
+			}
+			p = p->next_run;
 		}
-		p = p->next_run;
+
+		if (!c)
+			goto recalculate;
 	}
 
-	/* Do we need to re-calculate counters? */
-	if (!c)
-		goto recalculate;
+
+task_found:
+
 	/*
 	 * from this point on nothing can prevent us from
 	 * switching to the next task, save this fact in
 	 * sched_data.
 	 */
@@ -828,17 +1079,37 @@
 	reacquire_kernel_lock(current);
 	return;
 
 recalculate:
 	{
-		struct task_struct *p;
-		spin_unlock_irq(&runqueue_lock);
-		read_lock(&tasklist_lock);
-		for_each_task(p)
-			p->counter = (p->counter >> 1) + p->priority;
-		read_unlock(&tasklist_lock);
-		spin_lock_irq(&runqueue_lock);
+		int switched = gds_try_switch();
+
+		if (turbo_sched) {
+			struct task_struct *p;
+			read_lock(&tasklist_lock);
+			if (!switched) {
+				for_each_task(p) {
+					p->counter = (p->counter >> 1) + p->priority;
+					gds_switch(p);
+				}
+			} else {
+				for_each_task(p) {
+					p->counter = (p->counter >> 1) + p->priority;
+					gds_switch_init(p);
+				}
+			}
+			read_unlock(&tasklist_lock);
+		} else {
+			struct task_struct *p;
+			spin_unlock_irq(&runqueue_lock);
+			read_lock(&tasklist_lock);
+			for_each_task(p)
+				p->counter = (p->counter >> 1) + p->priority;
+			read_unlock(&tasklist_lock);
+			spin_lock_irq(&runqueue_lock);
+		}
+
 		goto repeat_schedule;
 	}
 
 still_running:
 	c = prev_goodness(prev, prev, this_cpu);
@@ -918,10 +1189,85 @@
 	read_unlock(&waitqueue_lock);
 out:
 	return;
 }
 
+
+/*
+ * This is the new code for semaphore wakeup.
+ * As You can see it release only the best waiting task except when 
+ * all processes counters are exhausted.
+ * In that case I prefer to fall in the previous implementation and 
+ * release all tasks instead of perform a recharge loop here.
+ * Anyway such situation rarely occur ( it is more rare higher is the 
+ * number of waiting task and with few tasks the cost of a total release 
+ * is not so high ).
+ * It can be stated that in SMP systems the goodness calculation is not 100% precise
+ * due to the fact that we don't know which CPU will reschedule the task.
+ * Anyway, IMVHO, I prefer this solution to :
+ * 1) a FIFO one due to the fact the we try to release the best task ( under UP we're 100% precise )
+ * 2) a "release all" method due to :
+ *		A) peak of processes flushed on the scheduler
+ *		B) starvation issues
+ */
+void __sem_wake_up(struct wait_queue **q, unsigned int mode)
+{
+	struct task_struct *p, *best = NULL;
+	struct wait_queue *head, *next;
+	int c = 0, this_cpu = current->processor;
+
+	if (!q)
+		goto out;
+	/*
+	 * this is safe to be done before the check because it
+	 * means no deference, just pointer operations.
+	 */
+	head = WAIT_QUEUE_HEAD(q);
+
+	read_lock(&waitqueue_lock);
+	next = *q;
+	if (!next)
+		goto out_unlock;
+
+	while (next != head) {
+		p = next->task;
+		next = next->next;
+		if (p->state & mode) {
+			/* Search the best one to run */
+			if (can_schedule(p)) {
+				int weight = goodness(current, p, this_cpu);
+				if (weight > c) {
+					c = weight, best = p;
+				}
+			}
+		}
+	}
+	/* Found it ? */
+	if (best) {
+		wake_up_process(best);
+	}
+	else {
+	/* Old way. Release all tasks ( sigh ! ) */
+		head = WAIT_QUEUE_HEAD(q);
+		next = *q;
+
+		while (next != head) {
+			p = next->task;
+			next = next->next;
+			if (p->state & mode) {
+				wake_up_process(p);
+			}
+		}
+	}
+out_unlock:
+	read_unlock(&waitqueue_lock);
+out:
+	return;
+}
+
+
+
 /*
  * Semaphores are implemented using a two-way counter:
  * The "count" variable is decremented for each process
  * that tries to sleep, while the "waking" variable is
  * incremented when the "up()" code goes to wake up waiting
@@ -951,11 +1297,15 @@
  * where we want to avoid any extra jumps and calls.
  */
 void __up(struct semaphore *sem)
 {
 	wake_one_more(sem);
+#ifdef OLD_SEMAPHORE_METHOD
 	wake_up(&sem->wait);
+#else
+	__sem_wake_up(&sem->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE);
+#endif
 }
 
 /*
  * Perform the "down" function.  Return zero for semaphore acquired,
  * return negative for signalled out of the function.
@@ -2061,6 +2411,7 @@
 		pidhash[nr] = NULL;
 
 	init_bh(TIMER_BH, timer_bh);
 	init_bh(TQUEUE_BH, tqueue_bh);
 	init_bh(IMMEDIATE_BH, immediate_bh);
+	gds_init();
 }