summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/mips/Kconfig5
-rw-r--r--arch/mips/kernel/process.c11
-rw-r--r--arch/mips/kernel/scall32-o32.S11
-rw-r--r--arch/mips/kernel/setup.c5
-rw-r--r--arch/mips/kernel/smp-mt.c11
-rw-r--r--arch/mips/kernel/traps.c30
-rw-r--r--include/asm-mips/cpu-features.h2
-rw-r--r--include/asm-mips/fpu.h4
-rw-r--r--include/asm-mips/processor.h16
-rw-r--r--include/asm-mips/system.h32
10 files changed, 125 insertions, 2 deletions
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f9be549645e..87f0b79c6b1 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1464,6 +1464,11 @@ config MIPS_VPE_LOADER
endchoice
+config MIPS_MT_FPAFF
+ bool "Dynamic FPU affinity for FP-intensive threads"
+ depends on MIPS_MT
+ default y
+
config MIPS_VPE_LOADER_TOM
bool "Load VPE program into memory hidden from linux"
depends on MIPS_VPE_LOADER
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 8b393df460a..199a06e873c 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -185,6 +185,17 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);
clear_tsk_thread_flag(p, TIF_USEDFPU);
+#ifdef CONFIG_MIPS_MT_FPAFF
+ /*
+ * FPU affinity support is cleaner if we track the
+ * user-visible CPU affinity from the very beginning.
+ * The generic cpus_allowed mask will already have
+ * been copied from the parent before copy_thread
+ * is invoked.
+ */
+ p->thread.user_cpus_allowed = p->cpus_allowed;
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
if (clone_flags & CLONE_SETTLS)
ti->tp_value = regs->regs[7];
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 4e36b87be1e..a0ac0e5f61a 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -569,8 +569,19 @@ einval: li v0, -EINVAL
sys sys_tkill 2
sys sys_sendfile64 5
sys sys_futex 6
+#ifdef CONFIG_MIPS_MT_FPAFF
+ /*
+ * For FPU affinity scheduling on MIPS MT processors, we need to
+ * intercept sys_sched_xxxaffinity() calls until we get a proper hook
+ * in kernel/sched.c. Considered only temporary we only support these
+ * hooks for the 32-bit kernel - there is no MIPS64 MT processor atm.
+ */
+ sys mipsmt_sys_sched_setaffinity 3
+ sys mipsmt_sys_sched_getaffinity 3
+#else
sys sys_sched_setaffinity 3
sys sys_sched_getaffinity 3 /* 4240 */
+#endif /* CONFIG_MIPS_MT_FPAFF */
sys sys_io_setup 2
sys sys_io_destroy 1
sys sys_io_getevents 5
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index dcbfd27071f..bcf1b10e518 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -529,7 +529,10 @@ void __init setup_arch(char **cmdline_p)
int __init fpu_disable(char *s)
{
- cpu_data[0].options &= ~MIPS_CPU_FPU;
+ int i;
+
+ for (i = 0; i < NR_CPUS; i++)
+ cpu_data[i].options &= ~MIPS_CPU_FPU;
return 1;
}
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 19b8e4b31b7..57770902b9a 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -150,6 +150,11 @@ void plat_smp_setup(void)
unsigned long val;
int i, num;
+#ifdef CONFIG_MIPS_MT_FPAFF
+ /* If we have an FPU, enroll ourselves in the FPU-full mask */
+ if (cpu_has_fpu)
+ cpu_set(0, mt_fpu_cpumask);
+#endif /* CONFIG_MIPS_MT_FPAFF */
if (!cpu_has_mipsmt)
return;
@@ -312,6 +317,12 @@ void prom_smp_finish(void)
{
write_c0_compare(read_c0_count() + (8* mips_hpt_frequency/HZ));
+#ifdef CONFIG_MIPS_MT_FPAFF
+ /* If we have an FPU, enroll ourselves in the FPU-full mask */
+ if (cpu_has_fpu)
+ cpu_set(smp_processor_id(), mt_fpu_cpumask);
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
local_irq_enable();
}
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 6336fe8008e..e9902d89dc0 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -758,6 +758,36 @@ asmlinkage void do_cpu(struct pt_regs *regs)
&current->thread.fpu.soft);
if (sig)
force_sig(sig, current);
+#ifdef CONFIG_MIPS_MT_FPAFF
+ else {
+ /*
+ * MIPS MT processors may have fewer FPU contexts
+ * than CPU threads. If we've emulated more than
+ * some threshold number of instructions, force
+ * migration to a "CPU" that has FP support.
+ */
+ if(mt_fpemul_threshold > 0
+ && ((current->thread.emulated_fp++
+ > mt_fpemul_threshold))) {
+ /*
+ * If there's no FPU present, or if the
+ * application has already restricted
+ * the allowed set to exclude any CPUs
+ * with FPUs, we'll skip the procedure.
+ */
+ if (cpus_intersects(current->cpus_allowed,
+ mt_fpu_cpumask)) {
+ cpumask_t tmask;
+
+ cpus_and(tmask,
+ current->thread.user_cpus_allowed,
+ mt_fpu_cpumask);
+ set_cpus_allowed(current, tmask);
+ current->thread.mflags |= MF_FPUBOUND;
+ }
+ }
+ }
+#endif /* CONFIG_MIPS_MT_FPAFF */
}
return;
diff --git a/include/asm-mips/cpu-features.h b/include/asm-mips/cpu-features.h
index 3f2b6d9ac45..254e11ed247 100644
--- a/include/asm-mips/cpu-features.h
+++ b/include/asm-mips/cpu-features.h
@@ -40,7 +40,7 @@
#define cpu_has_sb1_cache (cpu_data[0].options & MIPS_CPU_SB1_CACHE)
#endif
#ifndef cpu_has_fpu
-#define cpu_has_fpu (cpu_data[0].options & MIPS_CPU_FPU)
+#define cpu_has_fpu (current_cpu_data.options & MIPS_CPU_FPU)
#endif
#ifndef cpu_has_32fpr
#define cpu_has_32fpr (cpu_data[0].options & MIPS_CPU_32FPR)
diff --git a/include/asm-mips/fpu.h b/include/asm-mips/fpu.h
index 9c828b1f821..b0f50015e25 100644
--- a/include/asm-mips/fpu.h
+++ b/include/asm-mips/fpu.h
@@ -21,6 +21,10 @@
#include <asm/processor.h>
#include <asm/current.h>
+#ifdef CONFIG_MIPS_MT_FPAFF
+#include <asm/mips_mt.h>
+#endif
+
struct sigcontext;
struct sigcontext32;
diff --git a/include/asm-mips/processor.h b/include/asm-mips/processor.h
index 786651340de..0fb75f0762e 100644
--- a/include/asm-mips/processor.h
+++ b/include/asm-mips/processor.h
@@ -134,6 +134,12 @@ struct thread_struct {
/* Saved fpu/fpu emulator stuff. */
union mips_fpu_union fpu;
+#ifdef CONFIG_MIPS_MT_FPAFF
+ /* Emulated instruction count */
+ unsigned long emulated_fp;
+ /* Saved per-thread scheduler affinity mask */
+ cpumask_t user_cpus_allowed;
+#endif /* CONFIG_MIPS_MT_FPAFF */
/* Saved state of the DSP ASE, if available. */
struct mips_dsp_state dsp;
@@ -159,6 +165,12 @@ struct thread_struct {
#define MF_N32 MF_32BIT_ADDR
#define MF_N64 0
+#ifdef CONFIG_MIPS_MT_FPAFF
+#define FPAFF_INIT 0, INIT_CPUMASK,
+#else
+#define FPAFF_INIT
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
#define INIT_THREAD { \
/* \
* saved main processor registers \
@@ -174,6 +186,10 @@ struct thread_struct {
*/ \
INIT_FPU, \
/* \
+ * fpu affinity state (null if not FPAFF) \
+ */ \
+ FPAFF_INIT \
+ /* \
* saved dsp/dsp emulator stuff \
*/ \
INIT_DSP, \
diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h
index 39026690d9e..261f71d16a0 100644
--- a/include/asm-mips/system.h
+++ b/include/asm-mips/system.h
@@ -155,6 +155,37 @@ extern asmlinkage void *resume(void *last, void *next, void *next_ti);
struct task_struct;
+#ifdef CONFIG_MIPS_MT_FPAFF
+
+/*
+ * Handle the scheduler resume end of FPU affinity management. We do this
+ * inline to try to keep the overhead down. If we have been forced to run on
+ * a "CPU" with an FPU because of a previous high level of FP computation,
+ * but did not actually use the FPU during the most recent time-slice (CU1
+ * isn't set), we undo the restriction on cpus_allowed.
+ *
+ * We're not calling set_cpus_allowed() here, because we have no need to
+ * force prompt migration - we're already switching the current CPU to a
+ * different thread.
+ */
+
+#define switch_to(prev,next,last) \
+do { \
+ if (cpu_has_fpu && \
+ (prev->thread.mflags & MF_FPUBOUND) && \
+ (!(KSTK_STATUS(prev) & ST0_CU1))) { \
+ prev->thread.mflags &= ~MF_FPUBOUND; \
+ prev->cpus_allowed = prev->thread.user_cpus_allowed; \
+ } \
+ if (cpu_has_dsp) \
+ __save_dsp(prev); \
+ next->thread.emulated_fp = 0; \
+ (last) = resume(prev, next, next->thread_info); \
+ if (cpu_has_dsp) \
+ __restore_dsp(current); \
+} while(0)
+
+#else
#define switch_to(prev,next,last) \
do { \
if (cpu_has_dsp) \
@@ -163,6 +194,7 @@ do { \
if (cpu_has_dsp) \
__restore_dsp(current); \
} while(0)
+#endif
/*
* On SMP systems, when the scheduler does migration-cost autodetection,