diff options
author | Anton Arapov <anton@redhat.com> | 2012-04-16 10:05:28 +0200 |
---|---|---|
committer | Anton Arapov <anton@redhat.com> | 2012-04-16 10:05:28 +0200 |
commit | b4b6116a13633898cf868f2f103c96a90c4c20f8 (patch) | |
tree | 93d1b7e2cfcdf473d8d4ff3ad141fa864f8491f6 /arch/x86/include | |
parent | edd4be777c953e5faafc80d091d3084b4343f5d3 (diff) | |
download | kernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.tar.gz kernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.tar.xz kernel-uprobes-b4b6116a13633898cf868f2f103c96a90c4c20f8.zip |
fedora kernel: d9aad82f3319f3cfd1aebc01234254ef0c37ad84v3.3.2-1
Signed-off-by: Anton Arapov <anton@redhat.com>
Diffstat (limited to 'arch/x86/include')
326 files changed, 42975 insertions, 0 deletions
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild new file mode 100644 index 00000000000..b57e6a43a37 --- /dev/null +++ b/arch/x86/include/asm/Kbuild @@ -0,0 +1,26 @@ +include include/asm-generic/Kbuild.asm + +header-y += boot.h +header-y += bootparam.h +header-y += debugreg.h +header-y += e820.h +header-y += hw_breakpoint.h +header-y += hyperv.h +header-y += ist.h +header-y += ldt.h +header-y += mce.h +header-y += msr-index.h +header-y += msr.h +header-y += mtrr.h +header-y += posix_types_32.h +header-y += posix_types_64.h +header-y += prctl.h +header-y += processor-flags.h +header-y += ptrace-abi.h +header-y += sigcontext32.h +header-y += ucontext.h +header-y += vm86.h +header-y += vsyscall.h + +genhdr-y += unistd_32.h +genhdr-y += unistd_64.h diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h new file mode 100644 index 00000000000..7a15588e45d --- /dev/null +++ b/arch/x86/include/asm/a.out-core.h @@ -0,0 +1,65 @@ +/* a.out coredump register dumper + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _ASM_X86_A_OUT_CORE_H +#define _ASM_X86_A_OUT_CORE_H + +#ifdef __KERNEL__ +#ifdef CONFIG_X86_32 + +#include <linux/user.h> +#include <linux/elfcore.h> +#include <asm/debugreg.h> + +/* + * fill in the user structure for an a.out core dump + */ +static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) +{ +/* changed the size calculations - should hopefully work better. lbt */ + dump->magic = CMAGIC; + dump->start_code = 0; + dump->start_stack = regs->sp & ~(PAGE_SIZE - 1); + dump->u_tsize = ((unsigned long)current->mm->end_code) >> PAGE_SHIFT; + dump->u_dsize = ((unsigned long)(current->mm->brk + (PAGE_SIZE - 1))) + >> PAGE_SHIFT; + dump->u_dsize -= dump->u_tsize; + dump->u_ssize = 0; + aout_dump_debugregs(dump); + + if (dump->start_stack < TASK_SIZE) + dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) + >> PAGE_SHIFT; + + dump->regs.bx = regs->bx; + dump->regs.cx = regs->cx; + dump->regs.dx = regs->dx; + dump->regs.si = regs->si; + dump->regs.di = regs->di; + dump->regs.bp = regs->bp; + dump->regs.ax = regs->ax; + dump->regs.ds = (u16)regs->ds; + dump->regs.es = (u16)regs->es; + dump->regs.fs = (u16)regs->fs; + dump->regs.gs = get_user_gs(regs); + dump->regs.orig_ax = regs->orig_ax; + dump->regs.ip = regs->ip; + dump->regs.cs = (u16)regs->cs; + dump->regs.flags = regs->flags; + dump->regs.sp = regs->sp; + dump->regs.ss = (u16)regs->ss; + + dump->u_fpvalid = dump_fpu(regs, &dump->i387); +} + +#endif /* CONFIG_X86_32 */ +#endif /* __KERNEL__ */ +#endif /* _ASM_X86_A_OUT_CORE_H */ diff --git a/arch/x86/include/asm/a.out.h b/arch/x86/include/asm/a.out.h new file mode 100644 index 00000000000..4684f97a5bb --- /dev/null +++ b/arch/x86/include/asm/a.out.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_A_OUT_H +#define _ASM_X86_A_OUT_H + +struct exec +{ + unsigned int a_info; /* Use macros N_MAGIC, etc for access */ + unsigned a_text; /* length of text, in bytes */ + unsigned a_data; /* length of data, in bytes */ + unsigned a_bss; /* length of uninitialized data area for file, in bytes */ + unsigned a_syms; /* length of symbol table data in file, in bytes */ + unsigned a_entry; /* start address */ + unsigned a_trsize; /* length of relocation info for text, in bytes */ + unsigned a_drsize; /* length of relocation info for data, in bytes */ +}; + +#define N_TRSIZE(a) ((a).a_trsize) +#define N_DRSIZE(a) ((a).a_drsize) +#define N_SYMSIZE(a) ((a).a_syms) + +#endif /* _ASM_X86_A_OUT_H */ diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h new file mode 100644 index 00000000000..610001d385d --- /dev/null +++ b/arch/x86/include/asm/acpi.h @@ -0,0 +1,193 @@ +#ifndef _ASM_X86_ACPI_H +#define _ASM_X86_ACPI_H + +/* + * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> + * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <acpi/pdc_intel.h> + +#include <asm/numa.h> +#include <asm/processor.h> +#include <asm/mmu.h> +#include <asm/mpspec.h> +#include <asm/trampoline.h> + +#define COMPILER_DEPENDENT_INT64 long long +#define COMPILER_DEPENDENT_UINT64 unsigned long long + +/* + * Calling conventions: + * + * ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads) + * ACPI_EXTERNAL_XFACE - External ACPI interfaces + * ACPI_INTERNAL_XFACE - Internal ACPI interfaces + * ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces + */ +#define ACPI_SYSTEM_XFACE +#define ACPI_EXTERNAL_XFACE +#define ACPI_INTERNAL_XFACE +#define ACPI_INTERNAL_VAR_XFACE + +/* Asm macros */ + +#define ACPI_ASM_MACROS +#define BREAKPOINT3 +#define ACPI_DISABLE_IRQS() local_irq_disable() +#define ACPI_ENABLE_IRQS() local_irq_enable() +#define ACPI_FLUSH_CPU_CACHE() wbinvd() + +int __acpi_acquire_global_lock(unsigned int *lock); +int __acpi_release_global_lock(unsigned int *lock); + +#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \ + ((Acq) = __acpi_acquire_global_lock(&facs->global_lock)) + +#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \ + ((Acq) = __acpi_release_global_lock(&facs->global_lock)) + +/* + * Math helper asm macros + */ +#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \ + asm("divl %2;" \ + : "=a"(q32), "=d"(r32) \ + : "r"(d32), \ + "0"(n_lo), "1"(n_hi)) + + +#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \ + asm("shrl $1,%2 ;" \ + "rcrl $1,%3;" \ + : "=r"(n_hi), "=r"(n_lo) \ + : "0"(n_hi), "1"(n_lo)) + +#ifdef CONFIG_ACPI +extern int acpi_lapic; +extern int acpi_ioapic; +extern int acpi_noirq; +extern int acpi_strict; +extern int acpi_disabled; +extern int acpi_pci_disabled; +extern int acpi_skip_timer_override; +extern int acpi_use_timer_override; +extern int acpi_fix_pin2_polarity; + +extern u8 acpi_sci_flags; +extern int acpi_sci_override_gsi; +void acpi_pic_sci_set_trigger(unsigned int, u16); + +extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi, + int trigger, int polarity); + +static inline void disable_acpi(void) +{ + acpi_disabled = 1; + acpi_pci_disabled = 1; + acpi_noirq = 1; +} + +extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); + +static inline void acpi_noirq_set(void) { acpi_noirq = 1; } +static inline void acpi_disable_pci(void) +{ + acpi_pci_disabled = 1; + acpi_noirq_set(); +} + +/* Low-level suspend routine. */ +extern int acpi_suspend_lowlevel(void); + +extern const unsigned char acpi_wakeup_code[]; +#define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code))) + +/* early initialization routine */ +extern void acpi_reserve_wakeup_memory(void); + +/* + * Check if the CPU can handle C2 and deeper + */ +static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) +{ + /* + * Early models (<=5) of AMD Opterons are not supposed to go into + * C2 state. + * + * Steppings 0x0A and later are good + */ + if (boot_cpu_data.x86 == 0x0F && + boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86_model <= 0x05 && + boot_cpu_data.x86_mask < 0x0A) + return 1; + else if (amd_e400_c1e_detected) + return 1; + else + return max_cstate; +} + +static inline bool arch_has_acpi_pdc(void) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + return (c->x86_vendor == X86_VENDOR_INTEL || + c->x86_vendor == X86_VENDOR_CENTAUR); +} + +static inline void arch_acpi_set_pdc_bits(u32 *buf) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + + buf[2] |= ACPI_PDC_C_CAPABILITY_SMP; + + if (cpu_has(c, X86_FEATURE_EST)) + buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; + + if (cpu_has(c, X86_FEATURE_ACPI)) + buf[2] |= ACPI_PDC_T_FFH; + + /* + * If mwait/monitor is unsupported, C2/C3_FFH will be disabled + */ + if (!cpu_has(c, X86_FEATURE_MWAIT)) + buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); +} + +#else /* !CONFIG_ACPI */ + +#define acpi_lapic 0 +#define acpi_ioapic 0 +static inline void acpi_noirq_set(void) { } +static inline void acpi_disable_pci(void) { } +static inline void disable_acpi(void) { } + +#endif /* !CONFIG_ACPI */ + +#define ARCH_HAS_POWER_INIT 1 + +#ifdef CONFIG_ACPI_NUMA +extern int acpi_numa; +extern int x86_acpi_numa_init(void); +#endif /* CONFIG_ACPI_NUMA */ + +#define acpi_unlazy_tlb(x) leave_mm(x) + +#endif /* _ASM_X86_ACPI_H */ diff --git a/arch/x86/include/asm/aes.h b/arch/x86/include/asm/aes.h new file mode 100644 index 00000000000..80545a1cbe3 --- /dev/null +++ b/arch/x86/include/asm/aes.h @@ -0,0 +1,11 @@ +#ifndef ASM_X86_AES_H +#define ASM_X86_AES_H + +#include <linux/crypto.h> +#include <crypto/aes.h> + +void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, + const u8 *src); +void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, + const u8 *src); +#endif diff --git a/arch/x86/include/asm/agp.h b/arch/x86/include/asm/agp.h new file mode 100644 index 00000000000..eec2a70d437 --- /dev/null +++ b/arch/x86/include/asm/agp.h @@ -0,0 +1,31 @@ +#ifndef _ASM_X86_AGP_H +#define _ASM_X86_AGP_H + +#include <asm/pgtable.h> +#include <asm/cacheflush.h> + +/* + * Functions to keep the agpgart mappings coherent with the MMU. The + * GART gives the CPU a physical alias of pages in memory. The alias + * region is mapped uncacheable. Make sure there are no conflicting + * mappings with different cachability attributes for the same + * page. This avoids data corruption on some CPUs. + */ + +#define map_page_into_agp(page) set_pages_uc(page, 1) +#define unmap_page_from_agp(page) set_pages_wb(page, 1) + +/* + * Could use CLFLUSH here if the cpu supports it. But then it would + * need to be called for each cacheline of the whole page so it may + * not be worth it. Would need a page for it. + */ +#define flush_agp_cache() wbinvd() + +/* GATT allocation. Returns/accepts GATT kernel virtual address. */ +#define alloc_gatt_pages(order) \ + ((char *)__get_free_pages(GFP_KERNEL, (order))) +#define free_gatt_pages(table, order) \ + free_pages((unsigned long)(table), (order)) + +#endif /* _ASM_X86_AGP_H */ diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h new file mode 100644 index 00000000000..952bd0100c5 --- /dev/null +++ b/arch/x86/include/asm/alternative-asm.h @@ -0,0 +1,26 @@ +#ifdef __ASSEMBLY__ + +#include <asm/asm.h> + +#ifdef CONFIG_SMP + .macro LOCK_PREFIX +672: lock + .section .smp_locks,"a" + .balign 4 + .long 672b - . + .previous + .endm +#else + .macro LOCK_PREFIX + .endm +#endif + +.macro altinstruction_entry orig alt feature orig_len alt_len + .long \orig - . + .long \alt - . + .word \feature + .byte \orig_len + .byte \alt_len +.endm + +#endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h new file mode 100644 index 00000000000..37ad100a221 --- /dev/null +++ b/arch/x86/include/asm/alternative.h @@ -0,0 +1,189 @@ +#ifndef _ASM_X86_ALTERNATIVE_H +#define _ASM_X86_ALTERNATIVE_H + +#include <linux/types.h> +#include <linux/stddef.h> +#include <linux/stringify.h> +#include <asm/asm.h> + +/* + * Alternative inline assembly for SMP. + * + * The LOCK_PREFIX macro defined here replaces the LOCK and + * LOCK_PREFIX macros used everywhere in the source tree. + * + * SMP alternatives use the same data structures as the other + * alternatives and the X86_FEATURE_UP flag to indicate the case of a + * UP system running a SMP kernel. The existing apply_alternatives() + * works fine for patching a SMP kernel for UP. + * + * The SMP alternative tables can be kept after boot and contain both + * UP and SMP versions of the instructions to allow switching back to + * SMP at runtime, when hotplugging in a new CPU, which is especially + * useful in virtualized environments. + * + * The very common lock prefix is handled as special case in a + * separate table which is a pure address list without replacement ptr + * and size information. That keeps the table sizes small. + */ + +#ifdef CONFIG_SMP +#define LOCK_PREFIX_HERE \ + ".section .smp_locks,\"a\"\n" \ + ".balign 4\n" \ + ".long 671f - .\n" /* offset */ \ + ".previous\n" \ + "671:" + +#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " + +#else /* ! CONFIG_SMP */ +#define LOCK_PREFIX_HERE "" +#define LOCK_PREFIX "" +#endif + +struct alt_instr { + s32 instr_offset; /* original instruction */ + s32 repl_offset; /* offset to replacement instruction */ + u16 cpuid; /* cpuid bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction, <= instrlen */ +}; + +extern void alternative_instructions(void); +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + +struct module; + +#ifdef CONFIG_SMP +extern void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end); +extern void alternatives_smp_module_del(struct module *mod); +extern void alternatives_smp_switch(int smp); +extern int alternatives_text_reserved(void *start, void *end); +extern bool skip_smp_alternatives; +#else +static inline void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end) {} +static inline void alternatives_smp_module_del(struct module *mod) {} +static inline void alternatives_smp_switch(int smp) {} +static inline int alternatives_text_reserved(void *start, void *end) +{ + return 0; +} +#endif /* CONFIG_SMP */ + +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, newinstr, feature) \ + \ + "661:\n\t" oldinstr "\n662:\n" \ + ".section .altinstructions,\"a\"\n" \ + " .long 661b - .\n" /* label */ \ + " .long 663f - .\n" /* new instruction */ \ + " .word " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .discard,\"aw\",@progbits\n" \ + " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \ + ".previous\n" \ + ".section .altinstr_replacement, \"ax\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" + +/* + * This must be included *after* the definition of ALTERNATIVE due to + * <asm/arch_hweight.h> + */ +#include <asm/cpufeature.h> + +/* + * Alternative instructions for different CPU types or capabilities. + * + * This allows to use optimized instructions even on generic binary + * kernels. + * + * length of oldinstr must be longer or equal the length of newinstr + * It can be padded with nops as needed. + * + * For non barrier like inlines please define new variants + * without volatile and memory clobber. + */ +#define alternative(oldinstr, newinstr, feature) \ + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") + +/* + * Alternative inline assembly with input. + * + * Pecularities: + * No memory clobber here. + * Argument numbers start with 1. + * Best is to use constraints that are fixed size (like (%1) ... "r") + * If you use variable sized constraints like "m" or "g" in the + * replacement make sure to pad to the worst case length. + * Leaving an unused argument 0 to keep API compatibility. + */ +#define alternative_input(oldinstr, newinstr, feature, input...) \ + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + : : "i" (0), ## input) + +/* Like alternative_input, but with a single output argument */ +#define alternative_io(oldinstr, newinstr, feature, output, input...) \ + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + : output : "i" (0), ## input) + +/* Like alternative_io, but for replacing a direct call with another one. */ +#define alternative_call(oldfunc, newfunc, feature, output, input...) \ + asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \ + : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) + +/* + * use this macro(s) if you need more than one output parameter + * in alternative_io + */ +#define ASM_OUTPUT2(a...) a + +struct paravirt_patch_site; +#ifdef CONFIG_PARAVIRT +void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end); +#else +static inline void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end) +{} +#define __parainstructions NULL +#define __parainstructions_end NULL +#endif + +extern void *text_poke_early(void *addr, const void *opcode, size_t len); + +/* + * Clear and restore the kernel write-protection flag on the local CPU. + * Allows the kernel to edit read-only pages. + * Side-effect: any interrupt handler running between save and restore will have + * the ability to write to read-only pages. + * + * Warning: + * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and + * no thread can be preempted in the instructions being modified (no iret to an + * invalid instruction possible) or if the instructions are changed from a + * consistent state to another consistent state atomically. + * More care must be taken when modifying code in the SMP case because of + * Intel's errata. text_poke_smp() takes care that errata, but still + * doesn't support NMI/MCE handler code modifying. + * On the local CPU you need to be protected again NMI or MCE handlers seeing an + * inconsistent instruction while you patch. + */ +struct text_poke_param { + void *addr; + const void *opcode; + size_t len; +}; + +extern void *text_poke(void *addr, const void *opcode, size_t len); +extern void *text_poke_smp(void *addr, const void *opcode, size_t len); +extern void text_poke_smp_batch(struct text_poke_param *params, int n); + +#endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h new file mode 100644 index 00000000000..49ad773f4b9 --- /dev/null +++ b/arch/x86/include/asm/amd_nb.h @@ -0,0 +1,72 @@ +#ifndef _ASM_X86_AMD_NB_H +#define _ASM_X86_AMD_NB_H + +#include <linux/ioport.h> +#include <linux/pci.h> + +struct amd_nb_bus_dev_range { + u8 bus; + u8 dev_base; + u8 dev_limit; +}; + +extern const struct pci_device_id amd_nb_misc_ids[]; +extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; + +extern bool early_is_amd_nb(u32 value); +extern struct resource *amd_get_mmconfig_range(struct resource *res); +extern int amd_cache_northbridges(void); +extern void amd_flush_garts(void); +extern int amd_numa_init(void); +extern int amd_get_subcaches(int); +extern int amd_set_subcaches(int, int); + +struct amd_l3_cache { + unsigned indices; + u8 subcaches[4]; +}; + +struct amd_northbridge { + struct pci_dev *misc; + struct pci_dev *link; + struct amd_l3_cache l3_cache; +}; + +struct amd_northbridge_info { + u16 num; + u64 flags; + struct amd_northbridge *nb; +}; +extern struct amd_northbridge_info amd_northbridges; + +#define AMD_NB_GART BIT(0) +#define AMD_NB_L3_INDEX_DISABLE BIT(1) +#define AMD_NB_L3_PARTITIONING BIT(2) + +#ifdef CONFIG_AMD_NB + +static inline u16 amd_nb_num(void) +{ + return amd_northbridges.num; +} + +static inline bool amd_nb_has_feature(unsigned feature) +{ + return ((amd_northbridges.flags & feature) == feature); +} + +static inline struct amd_northbridge *node_to_amd_nb(int node) +{ + return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; +} + +#else + +#define amd_nb_num(x) 0 +#define amd_nb_has_feature(x) false +#define node_to_amd_nb(x) NULL + +#endif + + +#endif /* _ASM_X86_AMD_NB_H */ diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h new file mode 100644 index 00000000000..0acbac299e4 --- /dev/null +++ b/arch/x86/include/asm/apb_timer.h @@ -0,0 +1,49 @@ +/* + * apb_timer.h: Driver for Langwell APB timer based on Synopsis DesignWare + * + * (C) Copyright 2009 Intel Corporation + * Author: Jacob Pan (jacob.jun.pan@intel.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + * + * Note: + */ + +#ifndef ASM_X86_APBT_H +#define ASM_X86_APBT_H +#include <linux/sfi.h> + +#ifdef CONFIG_APB_TIMER + +/* default memory mapped register base */ +#define LNW_SCU_ADDR 0xFF100000 +#define LNW_EXT_TIMER_OFFSET 0x1B800 +#define APBT_DEFAULT_BASE (LNW_SCU_ADDR+LNW_EXT_TIMER_OFFSET) +#define LNW_EXT_TIMER_PGOFFSET 0x800 + +/* APBT clock speed range from PCLK to fabric base, 25-100MHz */ +#define APBT_MAX_FREQ 50000000 +#define APBT_MIN_FREQ 1000000 +#define APBT_MMAP_SIZE 1024 + +#define APBT_DEV_USED 1 + +extern void apbt_time_init(void); +extern unsigned long apbt_quick_calibrate(void); +extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); +extern void apbt_setup_secondary_clock(void); + +extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint); +extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr); +extern int sfi_mtimer_num; + +#else /* CONFIG_APB_TIMER */ + +static inline unsigned long apbt_quick_calibrate(void) {return 0; } +static inline void apbt_time_init(void) { } + +#endif +#endif /* ASM_X86_APBT_H */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h new file mode 100644 index 00000000000..3ab9bdd87e7 --- /dev/null +++ b/arch/x86/include/asm/apic.h @@ -0,0 +1,631 @@ +#ifndef _ASM_X86_APIC_H +#define _ASM_X86_APIC_H + +#include <linux/cpumask.h> +#include <linux/pm.h> + +#include <asm/alternative.h> +#include <asm/cpufeature.h> +#include <asm/processor.h> +#include <asm/apicdef.h> +#include <linux/atomic.h> +#include <asm/fixmap.h> +#include <asm/mpspec.h> +#include <asm/system.h> +#include <asm/msr.h> + +#define ARCH_APICTIMER_STOPS_ON_C3 1 + +/* + * Debugging macros + */ +#define APIC_QUIET 0 +#define APIC_VERBOSE 1 +#define APIC_DEBUG 2 + +/* + * Define the default level of output to be very little + * This can be turned up by using apic=verbose for more + * information and apic=debug for _lots_ of information. + * apic_verbosity is defined in apic.c + */ +#define apic_printk(v, s, a...) do { \ + if ((v) <= apic_verbosity) \ + printk(s, ##a); \ + } while (0) + + +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) +extern void generic_apic_probe(void); +#else +static inline void generic_apic_probe(void) +{ +} +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + +extern unsigned int apic_verbosity; +extern int local_apic_timer_c2_ok; + +extern int disable_apic; +extern unsigned int lapic_timer_frequency; + +#ifdef CONFIG_SMP +extern void __inquire_remote_apic(int apicid); +#else /* CONFIG_SMP */ +static inline void __inquire_remote_apic(int apicid) +{ +} +#endif /* CONFIG_SMP */ + +static inline void default_inquire_remote_apic(int apicid) +{ + if (apic_verbosity >= APIC_DEBUG) + __inquire_remote_apic(apicid); +} + +/* + * With 82489DX we can't rely on apic feature bit + * retrieved via cpuid but still have to deal with + * such an apic chip so we assume that SMP configuration + * is found from MP table (64bit case uses ACPI mostly + * which set smp presence flag as well so we are safe + * to use this helper too). + */ +static inline bool apic_from_smp_config(void) +{ + return smp_found_config && !disable_apic; +} + +/* + * Basic functions accessing APICs. + */ +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#endif + +#ifdef CONFIG_X86_64 +extern int is_vsmp_box(void); +#else +static inline int is_vsmp_box(void) +{ + return 0; +} +#endif +extern void xapic_wait_icr_idle(void); +extern u32 safe_xapic_wait_icr_idle(void); +extern void xapic_icr_write(u32, u32); +extern int setup_profiling_timer(unsigned int); + +static inline void native_apic_mem_write(u32 reg, u32 v) +{ + volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); + + alternative_io("movl %0, %1", "xchgl %0, %1", X86_FEATURE_11AP, + ASM_OUTPUT2("=r" (v), "=m" (*addr)), + ASM_OUTPUT2("0" (v), "m" (*addr))); +} + +static inline u32 native_apic_mem_read(u32 reg) +{ + return *((volatile u32 *)(APIC_BASE + reg)); +} + +extern void native_apic_wait_icr_idle(void); +extern u32 native_safe_apic_wait_icr_idle(void); +extern void native_apic_icr_write(u32 low, u32 id); +extern u64 native_apic_icr_read(void); + +extern int x2apic_mode; + +#ifdef CONFIG_X86_X2APIC +/* + * Make previous memory operations globally visible before + * sending the IPI through x2apic wrmsr. We need a serializing instruction or + * mfence for this. + */ +static inline void x2apic_wrmsr_fence(void) +{ + asm volatile("mfence" : : : "memory"); +} + +static inline void native_apic_msr_write(u32 reg, u32 v) +{ + if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || + reg == APIC_LVR) + return; + + wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0); +} + +static inline u32 native_apic_msr_read(u32 reg) +{ + u64 msr; + + if (reg == APIC_DFR) + return -1; + + rdmsrl(APIC_BASE_MSR + (reg >> 4), msr); + return (u32)msr; +} + +static inline void native_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return; +} + +static inline u32 native_safe_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return 0; +} + +static inline void native_x2apic_icr_write(u32 low, u32 id) +{ + wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); +} + +static inline u64 native_x2apic_icr_read(void) +{ + unsigned long val; + + rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + return val; +} + +extern int x2apic_phys; +extern int x2apic_preenabled; +extern void check_x2apic(void); +extern void enable_x2apic(void); +extern void x2apic_icr_write(u32 low, u32 id); +static inline int x2apic_enabled(void) +{ + u64 msr; + + if (!cpu_has_x2apic) + return 0; + + rdmsrl(MSR_IA32_APICBASE, msr); + if (msr & X2APIC_ENABLE) + return 1; + return 0; +} + +#define x2apic_supported() (cpu_has_x2apic) +static inline void x2apic_force_phys(void) +{ + x2apic_phys = 1; +} +#else +static inline void disable_x2apic(void) +{ +} +static inline void check_x2apic(void) +{ +} +static inline void enable_x2apic(void) +{ +} +static inline int x2apic_enabled(void) +{ + return 0; +} +static inline void x2apic_force_phys(void) +{ +} + +#define nox2apic 0 +#define x2apic_preenabled 0 +#define x2apic_supported() 0 +#endif + +extern void enable_IR_x2apic(void); + +extern int get_physical_broadcast(void); + +extern int lapic_get_maxlvt(void); +extern void clear_local_APIC(void); +extern void connect_bsp_APIC(void); +extern void disconnect_bsp_APIC(int virt_wire_setup); +extern void disable_local_APIC(void); +extern void lapic_shutdown(void); +extern int verify_local_APIC(void); +extern void sync_Arb_IDs(void); +extern void init_bsp_APIC(void); +extern void setup_local_APIC(void); +extern void end_local_APIC_setup(void); +extern void bsp_end_local_APIC_setup(void); +extern void init_apic_mappings(void); +void register_lapic_address(unsigned long address); +extern void setup_boot_APIC_clock(void); +extern void setup_secondary_APIC_clock(void); +extern int APIC_init_uniprocessor(void); +extern int apic_force_enable(unsigned long addr); + +/* + * On 32bit this is mach-xxx local + */ +#ifdef CONFIG_X86_64 +extern int apic_is_clustered_box(void); +#else +static inline int apic_is_clustered_box(void) +{ + return 0; +} +#endif + +extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); + +#else /* !CONFIG_X86_LOCAL_APIC */ +static inline void lapic_shutdown(void) { } +#define local_apic_timer_c2_ok 1 +static inline void init_apic_mappings(void) { } +static inline void disable_local_APIC(void) { } +# define setup_boot_APIC_clock x86_init_noop +# define setup_secondary_APIC_clock x86_init_noop +#endif /* !CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_X86_64 +#define SET_APIC_ID(x) (apic->set_apic_id(x)) +#else + +#endif + +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Generic APIC sub-arch data struct. + * + * Hacked for x86-64 by James Cleverdon from i386 architecture code by + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and + * James Cleverdon. + */ +struct apic { + char *name; + + int (*probe)(void); + int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); + int (*apic_id_registered)(void); + + u32 irq_delivery_mode; + u32 irq_dest_mode; + + const struct cpumask *(*target_cpus)(void); + + int disable_esr; + + int dest_logical; + unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); + unsigned long (*check_apicid_present)(int apicid); + + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); + void (*init_apic_ldr)(void); + + void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); + + void (*setup_apic_routing)(void); + int (*multi_timer_check)(int apic, int irq); + int (*cpu_present_to_apicid)(int mps_cpu); + void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); + void (*setup_portio_remap)(void); + int (*check_phys_apicid_present)(int phys_apicid); + void (*enable_apic_mode)(void); + int (*phys_pkg_id)(int cpuid_apic, int index_msb); + + /* + * When one of the next two hooks returns 1 the apic + * is switched to this. Essentially they are additional + * probe functions: + */ + int (*mps_oem_check)(struct mpc_table *mpc, char *oem, char *productid); + + unsigned int (*get_apic_id)(unsigned long x); + unsigned long (*set_apic_id)(unsigned int id); + unsigned long apic_id_mask; + + unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); + unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, + const struct cpumask *andmask); + + /* ipi */ + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *mask, + int vector); + void (*send_IPI_allbutself)(int vector); + void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); + + /* wakeup_secondary_cpu */ + int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); + + int trampoline_phys_low; + int trampoline_phys_high; + + void (*wait_for_init_deassert)(atomic_t *deassert); + void (*smp_callin_clear_local_apic)(void); + void (*inquire_remote_apic)(int apicid); + + /* apic ops */ + u32 (*read)(u32 reg); + void (*write)(u32 reg, u32 v); + u64 (*icr_read)(void); + void (*icr_write)(u32 low, u32 high); + void (*wait_icr_idle)(void); + u32 (*safe_wait_icr_idle)(void); + +#ifdef CONFIG_X86_32 + /* + * Called very early during boot from get_smp_config(). It should + * return the logical apicid. x86_[bios]_cpu_to_apicid is + * initialized before this function is called. + * + * If logical apicid can't be determined that early, the function + * may return BAD_APICID. Logical apicid will be configured after + * init_apic_ldr() while bringing up CPUs. Note that NUMA affinity + * won't be applied properly during early boot in this case. + */ + int (*x86_32_early_logical_apicid)(int cpu); + + /* + * Optional method called from setup_local_APIC() after logical + * apicid is guaranteed to be known to initialize apicid -> node + * mapping if NUMA initialization hasn't done so already. Don't + * add new users. + */ + int (*x86_32_numa_cpu_node)(int cpu); +#endif +}; + +/* + * Pointer to the local APIC driver in use on this system (there's + * always just one such driver in use - the kernel decides via an + * early probing process which one it picks - and then sticks to it): + */ +extern struct apic *apic; + +/* + * APIC drivers are probed based on how they are listed in the .apicdrivers + * section. So the order is important and enforced by the ordering + * of different apic driver files in the Makefile. + * + * For the files having two apic drivers, we use apic_drivers() + * to enforce the order with in them. + */ +#define apic_driver(sym) \ + static struct apic *__apicdrivers_##sym __used \ + __aligned(sizeof(struct apic *)) \ + __section(.apicdrivers) = { &sym } + +#define apic_drivers(sym1, sym2) \ + static struct apic *__apicdrivers_##sym1##sym2[2] __used \ + __aligned(sizeof(struct apic *)) \ + __section(.apicdrivers) = { &sym1, &sym2 } + +extern struct apic *__apicdrivers[], *__apicdrivers_end[]; + +/* + * APIC functionality to boot other CPUs - only used on SMP: + */ +#ifdef CONFIG_SMP +extern atomic_t init_deasserted; +extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + +static inline u32 apic_read(u32 reg) +{ + return apic->read(reg); +} + +static inline void apic_write(u32 reg, u32 val) +{ + apic->write(reg, val); +} + +static inline u64 apic_icr_read(void) +{ + return apic->icr_read(); +} + +static inline void apic_icr_write(u32 low, u32 high) +{ + apic->icr_write(low, high); +} + +static inline void apic_wait_icr_idle(void) +{ + apic->wait_icr_idle(); +} + +static inline u32 safe_apic_wait_icr_idle(void) +{ + return apic->safe_wait_icr_idle(); +} + +#else /* CONFIG_X86_LOCAL_APIC */ + +static inline u32 apic_read(u32 reg) { return 0; } +static inline void apic_write(u32 reg, u32 val) { } +static inline u64 apic_icr_read(void) { return 0; } +static inline void apic_icr_write(u32 low, u32 high) { } +static inline void apic_wait_icr_idle(void) { } +static inline u32 safe_apic_wait_icr_idle(void) { return 0; } + +#endif /* CONFIG_X86_LOCAL_APIC */ + +static inline void ack_APIC_irq(void) +{ + /* + * ack_APIC_irq() actually gets compiled as a single instruction + * ... yummie. + */ + + /* Docs say use 0 for future compatibility */ + apic_write(APIC_EOI, 0); +} + +static inline unsigned default_get_apic_id(unsigned long x) +{ + unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + + if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID)) + return (x >> 24) & 0xFF; + else + return (x >> 24) & 0x0F; +} + +/* + * Warm reset vector default position: + */ +#define DEFAULT_TRAMPOLINE_PHYS_LOW 0x467 +#define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469 + +#ifdef CONFIG_X86_64 +extern int default_acpi_madt_oem_check(char *, char *); + +extern void apic_send_IPI_self(int vector); + +DECLARE_PER_CPU(int, x2apic_extra_bits); + +extern int default_cpu_present_to_apicid(int mps_cpu); +extern int default_check_phys_apicid_present(int phys_apicid); +#endif + +static inline void default_wait_for_init_deassert(atomic_t *deassert) +{ + while (!atomic_read(deassert)) + cpu_relax(); + return; +} + +extern void generic_bigsmp_probe(void); + + +#ifdef CONFIG_X86_LOCAL_APIC + +#include <asm/smp.h> + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +static inline const struct cpumask *default_target_cpus(void) +{ +#ifdef CONFIG_SMP + return cpu_online_mask; +#else + return cpumask_of(0); +#endif +} + +DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); + + +static inline unsigned int read_apic_id(void) +{ + unsigned int reg; + + reg = apic_read(APIC_ID); + + return apic->get_apic_id(reg); +} + +extern void default_setup_apic_routing(void); + +extern struct apic apic_noop; + +#ifdef CONFIG_X86_32 + +static inline int noop_x86_32_early_logical_apicid(int cpu) +{ + return BAD_APICID; +} + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +extern void default_init_apic_ldr(void); + +static inline int default_apic_id_registered(void) +{ + return physid_isset(read_apic_id(), phys_cpu_present_map); +} + +static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +#endif + +static inline unsigned int +default_cpu_mask_to_apicid(const struct cpumask *cpumask) +{ + return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; +} + +static inline unsigned int +default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + unsigned long mask1 = cpumask_bits(cpumask)[0]; + unsigned long mask2 = cpumask_bits(andmask)[0]; + unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; + + return (unsigned int)(mask1 & mask2 & mask3); +} + +static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) +{ + return physid_isset(apicid, *map); +} + +static inline unsigned long default_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) +{ + *retmap = *phys_map; +} + +static inline int __default_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static inline int +__default_check_phys_apicid_present(int phys_apicid) +{ + return physid_isset(phys_apicid, phys_cpu_present_map); +} + +#ifdef CONFIG_X86_32 +static inline int default_cpu_present_to_apicid(int mps_cpu) +{ + return __default_cpu_present_to_apicid(mps_cpu); +} + +static inline int +default_check_phys_apicid_present(int phys_apicid) +{ + return __default_check_phys_apicid_present(phys_apicid); +} +#else +extern int default_cpu_present_to_apicid(int mps_cpu); +extern int default_check_phys_apicid_present(int phys_apicid); +#endif + +#endif /* CONFIG_X86_LOCAL_APIC */ + +#endif /* _ASM_X86_APIC_H */ diff --git a/arch/x86/include/asm/apic_flat_64.h b/arch/x86/include/asm/apic_flat_64.h new file mode 100644 index 00000000000..a2d31279644 --- /dev/null +++ b/arch/x86/include/asm/apic_flat_64.h @@ -0,0 +1,7 @@ +#ifndef _ASM_X86_APIC_FLAT_64_H +#define _ASM_X86_APIC_FLAT_64_H + +extern void flat_init_apic_ldr(void); + +#endif + diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h new file mode 100644 index 00000000000..134bba00df0 --- /dev/null +++ b/arch/x86/include/asm/apicdef.h @@ -0,0 +1,445 @@ +#ifndef _ASM_X86_APICDEF_H +#define _ASM_X86_APICDEF_H + +/* + * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) + * + * Alan Cox <Alan.Cox@linux.org>, 1995. + * Ingo Molnar <mingo@redhat.com>, 1999, 2000 + */ + +#define IO_APIC_DEFAULT_PHYS_BASE 0xfec00000 +#define APIC_DEFAULT_PHYS_BASE 0xfee00000 + +/* + * This is the IO-APIC register space as specified + * by Intel docs: + */ +#define IO_APIC_SLOT_SIZE 1024 + +#define APIC_ID 0x20 + +#define APIC_LVR 0x30 +#define APIC_LVR_MASK 0xFF00FF +#define APIC_LVR_DIRECTED_EOI (1 << 24) +#define GET_APIC_VERSION(x) ((x) & 0xFFu) +#define GET_APIC_MAXLVT(x) (((x) >> 16) & 0xFFu) +#ifdef CONFIG_X86_32 +# define APIC_INTEGRATED(x) ((x) & 0xF0u) +#else +# define APIC_INTEGRATED(x) (1) +#endif +#define APIC_XAPIC(x) ((x) >= 0x14) +#define APIC_EXT_SPACE(x) ((x) & 0x80000000) +#define APIC_TASKPRI 0x80 +#define APIC_TPRI_MASK 0xFFu +#define APIC_ARBPRI 0x90 +#define APIC_ARBPRI_MASK 0xFFu +#define APIC_PROCPRI 0xA0 +#define APIC_EOI 0xB0 +#define APIC_EIO_ACK 0x0 +#define APIC_RRR 0xC0 +#define APIC_LDR 0xD0 +#define APIC_LDR_MASK (0xFFu << 24) +#define GET_APIC_LOGICAL_ID(x) (((x) >> 24) & 0xFFu) +#define SET_APIC_LOGICAL_ID(x) (((x) << 24)) +#define APIC_ALL_CPUS 0xFFu +#define APIC_DFR 0xE0 +#define APIC_DFR_CLUSTER 0x0FFFFFFFul +#define APIC_DFR_FLAT 0xFFFFFFFFul +#define APIC_SPIV 0xF0 +#define APIC_SPIV_DIRECTED_EOI (1 << 12) +#define APIC_SPIV_FOCUS_DISABLED (1 << 9) +#define APIC_SPIV_APIC_ENABLED (1 << 8) +#define APIC_ISR 0x100 +#define APIC_ISR_NR 0x8 /* Number of 32 bit ISR registers. */ +#define APIC_TMR 0x180 +#define APIC_IRR 0x200 +#define APIC_ESR 0x280 +#define APIC_ESR_SEND_CS 0x00001 +#define APIC_ESR_RECV_CS 0x00002 +#define APIC_ESR_SEND_ACC 0x00004 +#define APIC_ESR_RECV_ACC 0x00008 +#define APIC_ESR_SENDILL 0x00020 +#define APIC_ESR_RECVILL 0x00040 +#define APIC_ESR_ILLREGA 0x00080 +#define APIC_LVTCMCI 0x2f0 +#define APIC_ICR 0x300 +#define APIC_DEST_SELF 0x40000 +#define APIC_DEST_ALLINC 0x80000 +#define APIC_DEST_ALLBUT 0xC0000 +#define APIC_ICR_RR_MASK 0x30000 +#define APIC_ICR_RR_INVALID 0x00000 +#define APIC_ICR_RR_INPROG 0x10000 +#define APIC_ICR_RR_VALID 0x20000 +#define APIC_INT_LEVELTRIG 0x08000 +#define APIC_INT_ASSERT 0x04000 +#define APIC_ICR_BUSY 0x01000 +#define APIC_DEST_LOGICAL 0x00800 +#define APIC_DEST_PHYSICAL 0x00000 +#define APIC_DM_FIXED 0x00000 +#define APIC_DM_FIXED_MASK 0x00700 +#define APIC_DM_LOWEST 0x00100 +#define APIC_DM_SMI 0x00200 +#define APIC_DM_REMRD 0x00300 +#define APIC_DM_NMI 0x00400 +#define APIC_DM_INIT 0x00500 +#define APIC_DM_STARTUP 0x00600 +#define APIC_DM_EXTINT 0x00700 +#define APIC_VECTOR_MASK 0x000FF +#define APIC_ICR2 0x310 +#define GET_APIC_DEST_FIELD(x) (((x) >> 24) & 0xFF) +#define SET_APIC_DEST_FIELD(x) ((x) << 24) +#define APIC_LVTT 0x320 +#define APIC_LVTTHMR 0x330 +#define APIC_LVTPC 0x340 +#define APIC_LVT0 0x350 +#define APIC_LVT_TIMER_BASE_MASK (0x3 << 18) +#define GET_APIC_TIMER_BASE(x) (((x) >> 18) & 0x3) +#define SET_APIC_TIMER_BASE(x) (((x) << 18)) +#define APIC_TIMER_BASE_CLKIN 0x0 +#define APIC_TIMER_BASE_TMBASE 0x1 +#define APIC_TIMER_BASE_DIV 0x2 +#define APIC_LVT_TIMER_ONESHOT (0 << 17) +#define APIC_LVT_TIMER_PERIODIC (1 << 17) +#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) +#define APIC_LVT_MASKED (1 << 16) +#define APIC_LVT_LEVEL_TRIGGER (1 << 15) +#define APIC_LVT_REMOTE_IRR (1 << 14) +#define APIC_INPUT_POLARITY (1 << 13) +#define APIC_SEND_PENDING (1 << 12) +#define APIC_MODE_MASK 0x700 +#define GET_APIC_DELIVERY_MODE(x) (((x) >> 8) & 0x7) +#define SET_APIC_DELIVERY_MODE(x, y) (((x) & ~0x700) | ((y) << 8)) +#define APIC_MODE_FIXED 0x0 +#define APIC_MODE_NMI 0x4 +#define APIC_MODE_EXTINT 0x7 +#define APIC_LVT1 0x360 +#define APIC_LVTERR 0x370 +#define APIC_TMICT 0x380 +#define APIC_TMCCT 0x390 +#define APIC_TDCR 0x3E0 +#define APIC_SELF_IPI 0x3F0 +#define APIC_TDR_DIV_TMBASE (1 << 2) +#define APIC_TDR_DIV_1 0xB +#define APIC_TDR_DIV_2 0x0 +#define APIC_TDR_DIV_4 0x1 +#define APIC_TDR_DIV_8 0x2 +#define APIC_TDR_DIV_16 0x3 +#define APIC_TDR_DIV_32 0x8 +#define APIC_TDR_DIV_64 0x9 +#define APIC_TDR_DIV_128 0xA +#define APIC_EFEAT 0x400 +#define APIC_ECTRL 0x410 +#define APIC_EILVTn(n) (0x500 + 0x10 * n) +#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ +#define APIC_EILVT_NR_AMD_10H 4 +#define APIC_EILVT_NR_MAX APIC_EILVT_NR_AMD_10H +#define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF) +#define APIC_EILVT_MSG_FIX 0x0 +#define APIC_EILVT_MSG_SMI 0x2 +#define APIC_EILVT_MSG_NMI 0x4 +#define APIC_EILVT_MSG_EXT 0x7 +#define APIC_EILVT_MASKED (1 << 16) + +#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) +#define APIC_BASE_MSR 0x800 +#define XAPIC_ENABLE (1UL << 11) +#define X2APIC_ENABLE (1UL << 10) + +#ifdef CONFIG_X86_32 +# define MAX_IO_APICS 64 +# define MAX_LOCAL_APIC 256 +#else +# define MAX_IO_APICS 128 +# define MAX_LOCAL_APIC 32768 +#endif + +/* + * All x86-64 systems are xAPIC compatible. + * In the following, "apicid" is a physical APIC ID. + */ +#define XAPIC_DEST_CPUS_SHIFT 4 +#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) +#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) +#define APIC_CLUSTER(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) +#define APIC_CLUSTERID(apicid) (APIC_CLUSTER(apicid) >> XAPIC_DEST_CPUS_SHIFT) +#define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK) +#define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT) + +/* + * the local APIC register structure, memory mapped. Not terribly well + * tested, but we might eventually use this one in the future - the + * problem why we cannot use it right now is the P5 APIC, it has an + * errata which cannot take 8-bit reads and writes, only 32-bit ones ... + */ +#define u32 unsigned int + +struct local_apic { + +/*000*/ struct { u32 __reserved[4]; } __reserved_01; + +/*010*/ struct { u32 __reserved[4]; } __reserved_02; + +/*020*/ struct { /* APIC ID Register */ + u32 __reserved_1 : 24, + phys_apic_id : 4, + __reserved_2 : 4; + u32 __reserved[3]; + } id; + +/*030*/ const + struct { /* APIC Version Register */ + u32 version : 8, + __reserved_1 : 8, + max_lvt : 8, + __reserved_2 : 8; + u32 __reserved[3]; + } version; + +/*040*/ struct { u32 __reserved[4]; } __reserved_03; + +/*050*/ struct { u32 __reserved[4]; } __reserved_04; + +/*060*/ struct { u32 __reserved[4]; } __reserved_05; + +/*070*/ struct { u32 __reserved[4]; } __reserved_06; + +/*080*/ struct { /* Task Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } tpr; + +/*090*/ const + struct { /* Arbitration Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } apr; + +/*0A0*/ const + struct { /* Processor Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } ppr; + +/*0B0*/ struct { /* End Of Interrupt Register */ + u32 eoi; + u32 __reserved[3]; + } eoi; + +/*0C0*/ struct { u32 __reserved[4]; } __reserved_07; + +/*0D0*/ struct { /* Logical Destination Register */ + u32 __reserved_1 : 24, + logical_dest : 8; + u32 __reserved_2[3]; + } ldr; + +/*0E0*/ struct { /* Destination Format Register */ + u32 __reserved_1 : 28, + model : 4; + u32 __reserved_2[3]; + } dfr; + +/*0F0*/ struct { /* Spurious Interrupt Vector Register */ + u32 spurious_vector : 8, + apic_enabled : 1, + focus_cpu : 1, + __reserved_2 : 22; + u32 __reserved_3[3]; + } svr; + +/*100*/ struct { /* In Service Register */ +/*170*/ u32 bitfield; + u32 __reserved[3]; + } isr [8]; + +/*180*/ struct { /* Trigger Mode Register */ +/*1F0*/ u32 bitfield; + u32 __reserved[3]; + } tmr [8]; + +/*200*/ struct { /* Interrupt Request Register */ +/*270*/ u32 bitfield; + u32 __reserved[3]; + } irr [8]; + +/*280*/ union { /* Error Status Register */ + struct { + u32 send_cs_error : 1, + receive_cs_error : 1, + send_accept_error : 1, + receive_accept_error : 1, + __reserved_1 : 1, + send_illegal_vector : 1, + receive_illegal_vector : 1, + illegal_register_address : 1, + __reserved_2 : 24; + u32 __reserved_3[3]; + } error_bits; + struct { + u32 errors; + u32 __reserved_3[3]; + } all_errors; + } esr; + +/*290*/ struct { u32 __reserved[4]; } __reserved_08; + +/*2A0*/ struct { u32 __reserved[4]; } __reserved_09; + +/*2B0*/ struct { u32 __reserved[4]; } __reserved_10; + +/*2C0*/ struct { u32 __reserved[4]; } __reserved_11; + +/*2D0*/ struct { u32 __reserved[4]; } __reserved_12; + +/*2E0*/ struct { u32 __reserved[4]; } __reserved_13; + +/*2F0*/ struct { u32 __reserved[4]; } __reserved_14; + +/*300*/ struct { /* Interrupt Command Register 1 */ + u32 vector : 8, + delivery_mode : 3, + destination_mode : 1, + delivery_status : 1, + __reserved_1 : 1, + level : 1, + trigger : 1, + __reserved_2 : 2, + shorthand : 2, + __reserved_3 : 12; + u32 __reserved_4[3]; + } icr1; + +/*310*/ struct { /* Interrupt Command Register 2 */ + union { + u32 __reserved_1 : 24, + phys_dest : 4, + __reserved_2 : 4; + u32 __reserved_3 : 24, + logical_dest : 8; + } dest; + u32 __reserved_4[3]; + } icr2; + +/*320*/ struct { /* LVT - Timer */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + timer_mode : 1, + __reserved_3 : 14; + u32 __reserved_4[3]; + } lvt_timer; + +/*330*/ struct { /* LVT - Thermal Sensor */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_thermal; + +/*340*/ struct { /* LVT - Performance Counter */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_pc; + +/*350*/ struct { /* LVT - LINT0 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint0; + +/*360*/ struct { /* LVT - LINT1 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint1; + +/*370*/ struct { /* LVT - Error */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_error; + +/*380*/ struct { /* Timer Initial Count Register */ + u32 initial_count; + u32 __reserved_2[3]; + } timer_icr; + +/*390*/ const + struct { /* Timer Current Count Register */ + u32 curr_count; + u32 __reserved_2[3]; + } timer_ccr; + +/*3A0*/ struct { u32 __reserved[4]; } __reserved_16; + +/*3B0*/ struct { u32 __reserved[4]; } __reserved_17; + +/*3C0*/ struct { u32 __reserved[4]; } __reserved_18; + +/*3D0*/ struct { u32 __reserved[4]; } __reserved_19; + +/*3E0*/ struct { /* Timer Divide Configuration Register */ + u32 divisor : 4, + __reserved_1 : 28; + u32 __reserved_2[3]; + } timer_dcr; + +/*3F0*/ struct { u32 __reserved[4]; } __reserved_20; + +} __attribute__ ((packed)); + +#undef u32 + +#ifdef CONFIG_X86_32 + #define BAD_APICID 0xFFu +#else + #define BAD_APICID 0xFFFFu +#endif + +enum ioapic_irq_destination_types { + dest_Fixed = 0, + dest_LowestPrio = 1, + dest_SMI = 2, + dest__reserved_1 = 3, + dest_NMI = 4, + dest_INIT = 5, + dest__reserved_2 = 6, + dest_ExtINT = 7 +}; + +#endif /* _ASM_X86_APICDEF_H */ diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h new file mode 100644 index 00000000000..20370c6db74 --- /dev/null +++ b/arch/x86/include/asm/apm.h @@ -0,0 +1,73 @@ +/* + * Machine specific APM BIOS functions for generic. + * Split out from apm.c by Osamu Tomita <tomita@cinet.co.jp> + */ + +#ifndef _ASM_X86_MACH_DEFAULT_APM_H +#define _ASM_X86_MACH_DEFAULT_APM_H + +#ifdef APM_ZERO_SEGS +# define APM_DO_ZERO_SEGS \ + "pushl %%ds\n\t" \ + "pushl %%es\n\t" \ + "xorl %%edx, %%edx\n\t" \ + "mov %%dx, %%ds\n\t" \ + "mov %%dx, %%es\n\t" \ + "mov %%dx, %%fs\n\t" \ + "mov %%dx, %%gs\n\t" +# define APM_DO_POP_SEGS \ + "popl %%es\n\t" \ + "popl %%ds\n\t" +#else +# define APM_DO_ZERO_SEGS +# define APM_DO_POP_SEGS +#endif + +static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, + u32 *eax, u32 *ebx, u32 *ecx, + u32 *edx, u32 *esi) +{ + /* + * N.B. We do NOT need a cld after the BIOS call + * because we always save and restore the flags. + */ + __asm__ __volatile__(APM_DO_ZERO_SEGS + "pushl %%edi\n\t" + "pushl %%ebp\n\t" + "lcall *%%cs:apm_bios_entry\n\t" + "setc %%al\n\t" + "popl %%ebp\n\t" + "popl %%edi\n\t" + APM_DO_POP_SEGS + : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx), + "=S" (*esi) + : "a" (func), "b" (ebx_in), "c" (ecx_in) + : "memory", "cc"); +} + +static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, + u32 ecx_in, u32 *eax) +{ + int cx, dx, si; + u8 error; + + /* + * N.B. We do NOT need a cld after the BIOS call + * because we always save and restore the flags. + */ + __asm__ __volatile__(APM_DO_ZERO_SEGS + "pushl %%edi\n\t" + "pushl %%ebp\n\t" + "lcall *%%cs:apm_bios_entry\n\t" + "setc %%bl\n\t" + "popl %%ebp\n\t" + "popl %%edi\n\t" + APM_DO_POP_SEGS + : "=a" (*eax), "=b" (error), "=c" (cx), "=d" (dx), + "=S" (si) + : "a" (func), "b" (ebx_in), "c" (ecx_in) + : "memory", "cc"); + return error; +} + +#endif /* _ASM_X86_MACH_DEFAULT_APM_H */ diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h new file mode 100644 index 00000000000..9686c3d9ff7 --- /dev/null +++ b/arch/x86/include/asm/arch_hweight.h @@ -0,0 +1,61 @@ +#ifndef _ASM_X86_HWEIGHT_H +#define _ASM_X86_HWEIGHT_H + +#ifdef CONFIG_64BIT +/* popcnt %edi, %eax -- redundant REX prefix for alignment */ +#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" +/* popcnt %rdi, %rax */ +#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" +#define REG_IN "D" +#define REG_OUT "a" +#else +/* popcnt %eax, %eax */ +#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0" +#define REG_IN "a" +#define REG_OUT "a" +#endif + +/* + * __sw_hweightXX are called from within the alternatives below + * and callee-clobbered registers need to be taken care of. See + * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective + * compiler switches. + */ +static inline unsigned int __arch_hweight32(unsigned int w) +{ + unsigned int res = 0; + + asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) + : "="REG_OUT (res) + : REG_IN (w)); + + return res; +} + +static inline unsigned int __arch_hweight16(unsigned int w) +{ + return __arch_hweight32(w & 0xffff); +} + +static inline unsigned int __arch_hweight8(unsigned int w) +{ + return __arch_hweight32(w & 0xff); +} + +static inline unsigned long __arch_hweight64(__u64 w) +{ + unsigned long res = 0; + +#ifdef CONFIG_X86_32 + return __arch_hweight32((u32)w) + + __arch_hweight32((u32)(w >> 32)); +#else + asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) + : "="REG_OUT (res) + : REG_IN (w)); +#endif /* CONFIG_X86_32 */ + + return res; +} + +#endif diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h new file mode 100644 index 00000000000..0d9ec770f2f --- /dev/null +++ b/arch/x86/include/asm/archrandom.h @@ -0,0 +1,75 @@ +/* + * This file is part of the Linux kernel. + * + * Copyright (c) 2011, Intel Corporation + * Authors: Fenghua Yu <fenghua.yu@intel.com>, + * H. Peter Anvin <hpa@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#ifndef ASM_X86_ARCHRANDOM_H +#define ASM_X86_ARCHRANDOM_H + +#include <asm/processor.h> +#include <asm/cpufeature.h> +#include <asm/alternative.h> +#include <asm/nops.h> + +#define RDRAND_RETRY_LOOPS 10 + +#define RDRAND_INT ".byte 0x0f,0xc7,0xf0" +#ifdef CONFIG_X86_64 +# define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0" +#else +# define RDRAND_LONG RDRAND_INT +#endif + +#ifdef CONFIG_ARCH_RANDOM + +#define GET_RANDOM(name, type, rdrand, nop) \ +static inline int name(type *v) \ +{ \ + int ok; \ + alternative_io("movl $0, %0\n\t" \ + nop, \ + "\n1: " rdrand "\n\t" \ + "jc 2f\n\t" \ + "decl %0\n\t" \ + "jnz 1b\n\t" \ + "2:", \ + X86_FEATURE_RDRAND, \ + ASM_OUTPUT2("=r" (ok), "=a" (*v)), \ + "0" (RDRAND_RETRY_LOOPS)); \ + return ok; \ +} + +#ifdef CONFIG_X86_64 + +GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5); +GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4); + +#else + +GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3); +GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); + +#endif /* CONFIG_X86_64 */ + +#endif /* CONFIG_ARCH_RANDOM */ + +extern void x86_init_rdrand(struct cpuinfo_x86 *c); + +#endif /* ASM_X86_ARCHRANDOM_H */ diff --git a/arch/x86/include/asm/asm-offsets.h b/arch/x86/include/asm/asm-offsets.h new file mode 100644 index 00000000000..d370ee36a18 --- /dev/null +++ b/arch/x86/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include <generated/asm-offsets.h> diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h new file mode 100644 index 00000000000..9412d6558c8 --- /dev/null +++ b/arch/x86/include/asm/asm.h @@ -0,0 +1,58 @@ +#ifndef _ASM_X86_ASM_H +#define _ASM_X86_ASM_H + +#ifdef __ASSEMBLY__ +# define __ASM_FORM(x) x +# define __ASM_FORM_COMMA(x) x, +# define __ASM_EX_SEC .section __ex_table, "a" +#else +# define __ASM_FORM(x) " " #x " " +# define __ASM_FORM_COMMA(x) " " #x "," +# define __ASM_EX_SEC " .section __ex_table,\"a\"\n" +#endif + +#ifdef CONFIG_X86_32 +# define __ASM_SEL(a,b) __ASM_FORM(a) +#else +# define __ASM_SEL(a,b) __ASM_FORM(b) +#endif + +#define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \ + inst##q##__VA_ARGS__) +#define __ASM_REG(reg) __ASM_SEL(e##reg, r##reg) + +#define _ASM_PTR __ASM_SEL(.long, .quad) +#define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8) + +#define _ASM_MOV __ASM_SIZE(mov) +#define _ASM_INC __ASM_SIZE(inc) +#define _ASM_DEC __ASM_SIZE(dec) +#define _ASM_ADD __ASM_SIZE(add) +#define _ASM_SUB __ASM_SIZE(sub) +#define _ASM_XADD __ASM_SIZE(xadd) + +#define _ASM_AX __ASM_REG(ax) +#define _ASM_BX __ASM_REG(bx) +#define _ASM_CX __ASM_REG(cx) +#define _ASM_DX __ASM_REG(dx) +#define _ASM_SP __ASM_REG(sp) +#define _ASM_BP __ASM_REG(bp) +#define _ASM_SI __ASM_REG(si) +#define _ASM_DI __ASM_REG(di) + +/* Exception table entry */ +#ifdef __ASSEMBLY__ +# define _ASM_EXTABLE(from,to) \ + __ASM_EX_SEC ; \ + _ASM_ALIGN ; \ + _ASM_PTR from , to ; \ + .previous +#else +# define _ASM_EXTABLE(from,to) \ + __ASM_EX_SEC \ + _ASM_ALIGN "\n" \ + _ASM_PTR #from "," #to "\n" \ + " .previous\n" +#endif + +#endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h new file mode 100644 index 00000000000..58cb6d4085f --- /dev/null +++ b/arch/x86/include/asm/atomic.h @@ -0,0 +1,317 @@ +#ifndef _ASM_X86_ATOMIC_H +#define _ASM_X86_ATOMIC_H + +#include <linux/compiler.h> +#include <linux/types.h> +#include <asm/processor.h> +#include <asm/alternative.h> +#include <asm/cmpxchg.h> + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +#define ATOMIC_INIT(i) { (i) } + +/** + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. + */ +static inline int atomic_read(const atomic_t *v) +{ + return (*(volatile int *)&(v)->counter); +} + +/** + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static inline void atomic_set(atomic_t *v, int i) +{ + v->counter = i; +} + +/** + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. + */ +static inline void atomic_add(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "addl %1,%0" + : "+m" (v->counter) + : "ir" (i)); +} + +/** + * atomic_sub - subtract integer from atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. + */ +static inline void atomic_sub(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "subl %1,%0" + : "+m" (v->counter) + : "ir" (i)); +} + +/** + * atomic_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline int atomic_sub_and_test(int i, atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" + : "+m" (v->counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +} + +/** + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. + */ +static inline void atomic_inc(atomic_t *v) +{ + asm volatile(LOCK_PREFIX "incl %0" + : "+m" (v->counter)); +} + +/** + * atomic_dec - decrement atomic variable + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1. + */ +static inline void atomic_dec(atomic_t *v) +{ + asm volatile(LOCK_PREFIX "decl %0" + : "+m" (v->counter)); +} + +/** + * atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int atomic_dec_and_test(atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "decl %0; sete %1" + : "+m" (v->counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int atomic_inc_and_test(atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "incl %0; sete %1" + : "+m" (v->counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * atomic_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline int atomic_add_negative(int i, atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" + : "+m" (v->counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +} + +/** + * atomic_add_return - add integer and return + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline int atomic_add_return(int i, atomic_t *v) +{ +#ifdef CONFIG_M386 + int __i; + unsigned long flags; + if (unlikely(boot_cpu_data.x86 <= 3)) + goto no_xadd; +#endif + /* Modern 486+ processor */ + return i + xadd(&v->counter, i); + +#ifdef CONFIG_M386 +no_xadd: /* Legacy 386 processor */ + raw_local_irq_save(flags); + __i = atomic_read(v); + atomic_set(v, i + __i); + raw_local_irq_restore(flags); + return i + __i; +#endif +} + +/** + * atomic_sub_return - subtract integer and return + * @v: pointer of type atomic_t + * @i: integer value to subtract + * + * Atomically subtracts @i from @v and returns @v - @i + */ +static inline int atomic_sub_return(int i, atomic_t *v) +{ + return atomic_add_return(-i, v); +} + +#define atomic_inc_return(v) (atomic_add_return(1, v)) +#define atomic_dec_return(v) (atomic_sub_return(1, v)) + +static inline int atomic_cmpxchg(atomic_t *v, int old, int new) +{ + return cmpxchg(&v->counter, old, new); +} + +static inline int atomic_xchg(atomic_t *v, int new) +{ + return xchg(&v->counter, new); +} + +/** + * __atomic_add_unless - add unless the number is already a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as @v was not already @u. + * Returns the old value of @v. + */ +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int c, old; + c = atomic_read(v); + for (;;) { + if (unlikely(c == (u))) + break; + old = atomic_cmpxchg((v), c, c + (a)); + if (likely(old == c)) + break; + c = old; + } + return c; +} + + +/* + * atomic_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic_t + * + * The function returns the old value of *v minus 1, even if + * the atomic variable, v, was not decremented. + */ +static inline int atomic_dec_if_positive(atomic_t *v) +{ + int c, old, dec; + c = atomic_read(v); + for (;;) { + dec = c - 1; + if (unlikely(dec < 0)) + break; + old = atomic_cmpxchg((v), c, dec); + if (likely(old == c)) + break; + c = old; + } + return dec; +} + +/** + * atomic_inc_short - increment of a short integer + * @v: pointer to type int + * + * Atomically adds 1 to @v + * Returns the new value of @u + */ +static inline short int atomic_inc_short(short int *v) +{ + asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v)); + return *v; +} + +#ifdef CONFIG_X86_64 +/** + * atomic_or_long - OR of two long integers + * @v1: pointer to type unsigned long + * @v2: pointer to type unsigned long + * + * Atomically ORs @v1 and @v2 + * Returns the result of the OR + */ +static inline void atomic_or_long(unsigned long *v1, unsigned long v2) +{ + asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2)); +} +#endif + +/* These are x86-specific, used by some header files */ +#define atomic_clear_mask(mask, addr) \ + asm volatile(LOCK_PREFIX "andl %0,%1" \ + : : "r" (~(mask)), "m" (*(addr)) : "memory") + +#define atomic_set_mask(mask, addr) \ + asm volatile(LOCK_PREFIX "orl %0,%1" \ + : : "r" ((unsigned)(mask)), "m" (*(addr)) \ + : "memory") + +/* Atomic operations are already serializing on x86 */ +#define smp_mb__before_atomic_dec() barrier() +#define smp_mb__after_atomic_dec() barrier() +#define smp_mb__before_atomic_inc() barrier() +#define smp_mb__after_atomic_inc() barrier() + +#ifdef CONFIG_X86_32 +# include "atomic64_32.h" +#else +# include "atomic64_64.h" +#endif + +#endif /* _ASM_X86_ATOMIC_H */ diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h new file mode 100644 index 00000000000..fa13f0ec287 --- /dev/null +++ b/arch/x86/include/asm/atomic64_32.h @@ -0,0 +1,304 @@ +#ifndef _ASM_X86_ATOMIC64_32_H +#define _ASM_X86_ATOMIC64_32_H + +#include <linux/compiler.h> +#include <linux/types.h> +#include <asm/processor.h> +//#include <asm/cmpxchg.h> + +/* An 64bit atomic type */ + +typedef struct { + u64 __aligned(8) counter; +} atomic64_t; + +#define ATOMIC64_INIT(val) { (val) } + +#ifdef CONFIG_X86_CMPXCHG64 +#define ATOMIC64_ALTERNATIVE_(f, g) "call atomic64_" #g "_cx8" +#else +#define ATOMIC64_ALTERNATIVE_(f, g) ALTERNATIVE("call atomic64_" #f "_386", "call atomic64_" #g "_cx8", X86_FEATURE_CX8) +#endif + +#define ATOMIC64_ALTERNATIVE(f) ATOMIC64_ALTERNATIVE_(f, f) + +/** + * atomic64_cmpxchg - cmpxchg atomic64 variable + * @p: pointer to type atomic64_t + * @o: expected value + * @n: new value + * + * Atomically sets @v to @n if it was equal to @o and returns + * the old value. + */ + +static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) +{ + return cmpxchg64(&v->counter, o, n); +} + +/** + * atomic64_xchg - xchg atomic64 variable + * @v: pointer to type atomic64_t + * @n: value to assign + * + * Atomically xchgs the value of @v to @n and returns + * the old value. + */ +static inline long long atomic64_xchg(atomic64_t *v, long long n) +{ + long long o; + unsigned high = (unsigned)(n >> 32); + unsigned low = (unsigned)n; + asm volatile(ATOMIC64_ALTERNATIVE(xchg) + : "=A" (o), "+b" (low), "+c" (high) + : "S" (v) + : "memory" + ); + return o; +} + +/** + * atomic64_set - set atomic64 variable + * @v: pointer to type atomic64_t + * @n: value to assign + * + * Atomically sets the value of @v to @n. + */ +static inline void atomic64_set(atomic64_t *v, long long i) +{ + unsigned high = (unsigned)(i >> 32); + unsigned low = (unsigned)i; + asm volatile(ATOMIC64_ALTERNATIVE(set) + : "+b" (low), "+c" (high) + : "S" (v) + : "eax", "edx", "memory" + ); +} + +/** + * atomic64_read - read atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically reads the value of @v and returns it. + */ +static inline long long atomic64_read(const atomic64_t *v) +{ + long long r; + asm volatile(ATOMIC64_ALTERNATIVE(read) + : "=A" (r), "+c" (v) + : : "memory" + ); + return r; + } + +/** + * atomic64_add_return - add and return + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns @i + *@v + */ +static inline long long atomic64_add_return(long long i, atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE(add_return) + : "+A" (i), "+c" (v) + : : "memory" + ); + return i; +} + +/* + * Other variants with different arithmetic operators: + */ +static inline long long atomic64_sub_return(long long i, atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE(sub_return) + : "+A" (i), "+c" (v) + : : "memory" + ); + return i; +} + +static inline long long atomic64_inc_return(atomic64_t *v) +{ + long long a; + asm volatile(ATOMIC64_ALTERNATIVE(inc_return) + : "=A" (a) + : "S" (v) + : "memory", "ecx" + ); + return a; +} + +static inline long long atomic64_dec_return(atomic64_t *v) +{ + long long a; + asm volatile(ATOMIC64_ALTERNATIVE(dec_return) + : "=A" (a) + : "S" (v) + : "memory", "ecx" + ); + return a; +} + +/** + * atomic64_add - add integer to atomic64 variable + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v. + */ +static inline long long atomic64_add(long long i, atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE_(add, add_return) + : "+A" (i), "+c" (v) + : : "memory" + ); + return i; +} + +/** + * atomic64_sub - subtract the atomic64 variable + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v. + */ +static inline long long atomic64_sub(long long i, atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE_(sub, sub_return) + : "+A" (i), "+c" (v) + : : "memory" + ); + return i; +} + +/** + * atomic64_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline int atomic64_sub_and_test(long long i, atomic64_t *v) +{ + return atomic64_sub_return(i, v) == 0; +} + +/** + * atomic64_inc - increment atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1. + */ +static inline void atomic64_inc(atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE_(inc, inc_return) + : : "S" (v) + : "memory", "eax", "ecx", "edx" + ); +} + +/** + * atomic64_dec - decrement atomic64 variable + * @ptr: pointer to type atomic64_t + * + * Atomically decrements @ptr by 1. + */ +static inline void atomic64_dec(atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE_(dec, dec_return) + : : "S" (v) + : "memory", "eax", "ecx", "edx" + ); +} + +/** + * atomic64_dec_and_test - decrement and test + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int atomic64_dec_and_test(atomic64_t *v) +{ + return atomic64_dec_return(v) == 0; +} + +/** + * atomic64_inc_and_test - increment and test + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int atomic64_inc_and_test(atomic64_t *v) +{ + return atomic64_inc_return(v) == 0; +} + +/** + * atomic64_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline int atomic64_add_negative(long long i, atomic64_t *v) +{ + return atomic64_add_return(i, v) < 0; +} + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns the old value of @v. + */ +static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) +{ + unsigned low = (unsigned)u; + unsigned high = (unsigned)(u >> 32); + asm volatile(ATOMIC64_ALTERNATIVE(add_unless) "\n\t" + : "+A" (a), "+c" (v), "+S" (low), "+D" (high) + : : "memory"); + return (int)a; +} + + +static inline int atomic64_inc_not_zero(atomic64_t *v) +{ + int r; + asm volatile(ATOMIC64_ALTERNATIVE(inc_not_zero) + : "=a" (r) + : "S" (v) + : "ecx", "edx", "memory" + ); + return r; +} + +static inline long long atomic64_dec_if_positive(atomic64_t *v) +{ + long long r; + asm volatile(ATOMIC64_ALTERNATIVE(dec_if_positive) + : "=A" (r) + : "S" (v) + : "ecx", "memory" + ); + return r; +} + +#undef ATOMIC64_ALTERNATIVE +#undef ATOMIC64_ALTERNATIVE_ + +#endif /* _ASM_X86_ATOMIC64_32_H */ diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h new file mode 100644 index 00000000000..0e1cbfc8ee0 --- /dev/null +++ b/arch/x86/include/asm/atomic64_64.h @@ -0,0 +1,243 @@ +#ifndef _ASM_X86_ATOMIC64_64_H +#define _ASM_X86_ATOMIC64_64_H + +#include <linux/types.h> +#include <asm/alternative.h> +#include <asm/cmpxchg.h> + +/* The 64-bit atomic type */ + +#define ATOMIC64_INIT(i) { (i) } + +/** + * atomic64_read - read atomic64 variable + * @v: pointer of type atomic64_t + * + * Atomically reads the value of @v. + * Doesn't imply a read memory barrier. + */ +static inline long atomic64_read(const atomic64_t *v) +{ + return (*(volatile long *)&(v)->counter); +} + +/** + * atomic64_set - set atomic64 variable + * @v: pointer to type atomic64_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static inline void atomic64_set(atomic64_t *v, long i) +{ + v->counter = i; +} + +/** + * atomic64_add - add integer to atomic64 variable + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v. + */ +static inline void atomic64_add(long i, atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "addq %1,%0" + : "=m" (v->counter) + : "er" (i), "m" (v->counter)); +} + +/** + * atomic64_sub - subtract the atomic64 variable + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v. + */ +static inline void atomic64_sub(long i, atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "subq %1,%0" + : "=m" (v->counter) + : "er" (i), "m" (v->counter)); +} + +/** + * atomic64_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline int atomic64_sub_and_test(long i, atomic64_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "subq %2,%0; sete %1" + : "=m" (v->counter), "=qm" (c) + : "er" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic64_inc - increment atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1. + */ +static inline void atomic64_inc(atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "incq %0" + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic64_dec - decrement atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1. + */ +static inline void atomic64_dec(atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "decq %0" + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic64_dec_and_test - decrement and test + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int atomic64_dec_and_test(atomic64_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "decq %0; sete %1" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic64_inc_and_test - increment and test + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int atomic64_inc_and_test(atomic64_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "incq %0; sete %1" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic64_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline int atomic64_add_negative(long i, atomic64_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "addq %2,%0; sets %1" + : "=m" (v->counter), "=qm" (c) + : "er" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic64_add_return - add and return + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline long atomic64_add_return(long i, atomic64_t *v) +{ + return i + xadd(&v->counter, i); +} + +static inline long atomic64_sub_return(long i, atomic64_t *v) +{ + return atomic64_add_return(-i, v); +} + +#define atomic64_inc_return(v) (atomic64_add_return(1, (v))) +#define atomic64_dec_return(v) (atomic64_sub_return(1, (v))) + +static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) +{ + return cmpxchg(&v->counter, old, new); +} + +static inline long atomic64_xchg(atomic64_t *v, long new) +{ + return xchg(&v->counter, new); +} + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns the old value of @v. + */ +static inline int atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long c, old; + c = atomic64_read(v); + for (;;) { + if (unlikely(c == (u))) + break; + old = atomic64_cmpxchg((v), c, c + (a)); + if (likely(old == c)) + break; + c = old; + } + return c != (u); +} + +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + +/* + * atomic64_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic_t + * + * The function returns the old value of *v minus 1, even if + * the atomic variable, v, was not decremented. + */ +static inline long atomic64_dec_if_positive(atomic64_t *v) +{ + long c, old, dec; + c = atomic64_read(v); + for (;;) { + dec = c - 1; + if (unlikely(dec < 0)) + break; + old = atomic64_cmpxchg((v), c, dec); + if (likely(old == c)) + break; + c = old; + } + return dec; +} + +#endif /* _ASM_X86_ATOMIC64_64_H */ diff --git a/arch/x86/include/asm/auxvec.h b/arch/x86/include/asm/auxvec.h new file mode 100644 index 00000000000..1316b4c3542 --- /dev/null +++ b/arch/x86/include/asm/auxvec.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_AUXVEC_H +#define _ASM_X86_AUXVEC_H +/* + * Architecture-neutral AT_ values in 0-17, leave some room + * for more of them, start the x86-specific ones at 32. + */ +#ifdef __i386__ +#define AT_SYSINFO 32 +#endif +#define AT_SYSINFO_EHDR 33 + +#endif /* _ASM_X86_AUXVEC_H */ diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h new file mode 100644 index 00000000000..aa6a3170ab5 --- /dev/null +++ b/arch/x86/include/asm/bios_ebda.h @@ -0,0 +1,60 @@ +#ifndef _ASM_X86_BIOS_EBDA_H +#define _ASM_X86_BIOS_EBDA_H + +#include <asm/io.h> + +/* + * Returns physical address of EBDA. Returns 0 if there is no EBDA. + */ +static inline unsigned int get_bios_ebda(void) +{ + /* + * There is a real-mode segmented pointer pointing to the + * 4K EBDA area at 0x40E. + */ + unsigned int address = *(unsigned short *)phys_to_virt(0x40E); + address <<= 4; + return address; /* 0 means none */ +} + +/* + * Return the sanitized length of the EBDA in bytes, if it exists. + */ +static inline unsigned int get_bios_ebda_length(void) +{ + unsigned int address; + unsigned int length; + + address = get_bios_ebda(); + if (!address) + return 0; + + /* EBDA length is byte 0 of the EBDA (stored in KiB) */ + length = *(unsigned char *)phys_to_virt(address); + length <<= 10; + + /* Trim the length if it extends beyond 640KiB */ + length = min_t(unsigned int, (640 * 1024) - address, length); + return length; +} + +void reserve_ebda_region(void); + +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION +/* + * This is obviously not a great place for this, but we want to be + * able to scatter it around anywhere in the kernel. + */ +void check_for_bios_corruption(void); +void start_periodic_check_for_corruption(void); +#else +static inline void check_for_bios_corruption(void) +{ +} + +static inline void start_periodic_check_for_corruption(void) +{ +} +#endif + +#endif /* _ASM_X86_BIOS_EBDA_H */ diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h new file mode 100644 index 00000000000..b97596e2b68 --- /dev/null +++ b/arch/x86/include/asm/bitops.h @@ -0,0 +1,514 @@ +#ifndef _ASM_X86_BITOPS_H +#define _ASM_X86_BITOPS_H + +/* + * Copyright 1992, Linus Torvalds. + * + * Note: inlines with more than a single statement should be marked + * __always_inline to avoid problems with older gcc's inlining heuristics. + */ + +#ifndef _LINUX_BITOPS_H +#error only <linux/bitops.h> can be included directly +#endif + +#include <linux/compiler.h> +#include <asm/alternative.h> + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) +/* Technically wrong, but this avoids compilation errors on some gcc + versions. */ +#define BITOP_ADDR(x) "=m" (*(volatile long *) (x)) +#else +#define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) +#endif + +#define ADDR BITOP_ADDR(addr) + +/* + * We do the locked ops that don't return the old value as + * a mask operation on a byte. + */ +#define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) +#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3)) +#define CONST_MASK(nr) (1 << ((nr) & 7)) + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * + * Note: there are no guarantees that this function will not be reordered + * on non x86 architectures, so if you are writing portable code, + * make sure not to rely on its reordering guarantees. + * + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static __always_inline void +set_bit(unsigned int nr, volatile unsigned long *addr) +{ + if (IS_IMMEDIATE(nr)) { + asm volatile(LOCK_PREFIX "orb %1,%0" + : CONST_MASK_ADDR(nr, addr) + : "iq" ((u8)CONST_MASK(nr)) + : "memory"); + } else { + asm volatile(LOCK_PREFIX "bts %1,%0" + : BITOP_ADDR(addr) : "Ir" (nr) : "memory"); + } +} + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __set_bit(int nr, volatile unsigned long *addr) +{ + asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static __always_inline void +clear_bit(int nr, volatile unsigned long *addr) +{ + if (IS_IMMEDIATE(nr)) { + asm volatile(LOCK_PREFIX "andb %1,%0" + : CONST_MASK_ADDR(nr, addr) + : "iq" ((u8)~CONST_MASK(nr))); + } else { + asm volatile(LOCK_PREFIX "btr %1,%0" + : BITOP_ADDR(addr) + : "Ir" (nr)); + } +} + +/* + * clear_bit_unlock - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and implies release semantics before the memory + * operation. It can be used for an unlock. + */ +static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr) +{ + barrier(); + clear_bit(nr, addr); +} + +static inline void __clear_bit(int nr, volatile unsigned long *addr) +{ + asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); +} + +/* + * __clear_bit_unlock - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * __clear_bit() is non-atomic and implies release semantics before the memory + * operation. It can be used for an unlock if no other CPUs can concurrently + * modify other bits in the word. + * + * No memory barrier is required here, because x86 cannot reorder stores past + * older loads. Same principle as spin_unlock. + */ +static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr) +{ + barrier(); + __clear_bit(nr, addr); +} + +#define smp_mb__before_clear_bit() barrier() +#define smp_mb__after_clear_bit() barrier() + +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to change + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __change_bit(int nr, volatile unsigned long *addr) +{ + asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); +} + +/** + * change_bit - Toggle a bit in memory + * @nr: Bit to change + * @addr: Address to start counting from + * + * change_bit() is atomic and may not be reordered. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void change_bit(int nr, volatile unsigned long *addr) +{ + if (IS_IMMEDIATE(nr)) { + asm volatile(LOCK_PREFIX "xorb %1,%0" + : CONST_MASK_ADDR(nr, addr) + : "iq" ((u8)CONST_MASK(nr))); + } else { + asm volatile(LOCK_PREFIX "btc %1,%0" + : BITOP_ADDR(addr) + : "Ir" (nr)); + } +} + +/** + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_set_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile(LOCK_PREFIX "bts %2,%1\n\t" + "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); + + return oldbit; +} + +/** + * test_and_set_bit_lock - Set a bit and return its old value for lock + * @nr: Bit to set + * @addr: Address to count from + * + * This is the same as test_and_set_bit on x86. + */ +static __always_inline int +test_and_set_bit_lock(int nr, volatile unsigned long *addr) +{ + return test_and_set_bit(nr, addr); +} + +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm("bts %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR + : "Ir" (nr)); + return oldbit; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile(LOCK_PREFIX "btr %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); + + return oldbit; +} + +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile("btr %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR + : "Ir" (nr)); + return oldbit; +} + +/* WARNING: non atomic and it can be reordered! */ +static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile("btc %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR + : "Ir" (nr) : "memory"); + + return oldbit; +} + +/** + * test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_change_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile(LOCK_PREFIX "btc %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); + + return oldbit; +} + +static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) +{ + return ((1UL << (nr % BITS_PER_LONG)) & + (addr[nr / BITS_PER_LONG])) != 0; +} + +static inline int variable_test_bit(int nr, volatile const unsigned long *addr) +{ + int oldbit; + + asm volatile("bt %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit) + : "m" (*(unsigned long *)addr), "Ir" (nr)); + + return oldbit; +} + +#if 0 /* Fool kernel-doc since it doesn't do macros yet */ +/** + * test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static int test_bit(int nr, const volatile unsigned long *addr); +#endif + +#define test_bit(nr, addr) \ + (__builtin_constant_p((nr)) \ + ? constant_test_bit((nr), (addr)) \ + : variable_test_bit((nr), (addr))) + +/** + * __ffs - find first set bit in word + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static inline unsigned long __ffs(unsigned long word) +{ + asm("bsf %1,%0" + : "=r" (word) + : "rm" (word)); + return word; +} + +/** + * ffz - find first zero bit in word + * @word: The word to search + * + * Undefined if no zero exists, so code should check against ~0UL first. + */ +static inline unsigned long ffz(unsigned long word) +{ + asm("bsf %1,%0" + : "=r" (word) + : "r" (~word)); + return word; +} + +/* + * __fls: find last set bit in word + * @word: The word to search + * + * Undefined if no set bit exists, so code should check against 0 first. + */ +static inline unsigned long __fls(unsigned long word) +{ + asm("bsr %1,%0" + : "=r" (word) + : "rm" (word)); + return word; +} + +#undef ADDR + +#ifdef __KERNEL__ +/** + * ffs - find first set bit in word + * @x: the word to search + * + * This is defined the same way as the libc and compiler builtin ffs + * routines, therefore differs in spirit from the other bitops. + * + * ffs(value) returns 0 if value is 0 or the position of the first + * set bit if value is nonzero. The first (least significant) bit + * is at position 1. + */ +static inline int ffs(int x) +{ + int r; + +#ifdef CONFIG_X86_64 + /* + * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the + * dest reg is undefined if x==0, but their CPU architect says its + * value is written to set it to the same as before, except that the + * top 32 bits will be cleared. + * + * We cannot do this on 32 bits because at the very least some + * 486 CPUs did not behave this way. + */ + long tmp = -1; + asm("bsfl %1,%0" + : "=r" (r) + : "rm" (x), "0" (tmp)); +#elif defined(CONFIG_X86_CMOV) + asm("bsfl %1,%0\n\t" + "cmovzl %2,%0" + : "=&r" (r) : "rm" (x), "r" (-1)); +#else + asm("bsfl %1,%0\n\t" + "jnz 1f\n\t" + "movl $-1,%0\n" + "1:" : "=r" (r) : "rm" (x)); +#endif + return r + 1; +} + +/** + * fls - find last set bit in word + * @x: the word to search + * + * This is defined in a similar way as the libc and compiler builtin + * ffs, but returns the position of the most significant set bit. + * + * fls(value) returns 0 if value is 0 or the position of the last + * set bit if value is nonzero. The last (most significant) bit is + * at position 32. + */ +static inline int fls(int x) +{ + int r; + +#ifdef CONFIG_X86_64 + /* + * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the + * dest reg is undefined if x==0, but their CPU architect says its + * value is written to set it to the same as before, except that the + * top 32 bits will be cleared. + * + * We cannot do this on 32 bits because at the very least some + * 486 CPUs did not behave this way. + */ + long tmp = -1; + asm("bsrl %1,%0" + : "=r" (r) + : "rm" (x), "0" (tmp)); +#elif defined(CONFIG_X86_CMOV) + asm("bsrl %1,%0\n\t" + "cmovzl %2,%0" + : "=&r" (r) : "rm" (x), "rm" (-1)); +#else + asm("bsrl %1,%0\n\t" + "jnz 1f\n\t" + "movl $-1,%0\n" + "1:" : "=r" (r) : "rm" (x)); +#endif + return r + 1; +} + +/** + * fls64 - find last set bit in a 64-bit word + * @x: the word to search + * + * This is defined in a similar way as the libc and compiler builtin + * ffsll, but returns the position of the most significant set bit. + * + * fls64(value) returns 0 if value is 0 or the position of the last + * set bit if value is nonzero. The last (most significant) bit is + * at position 64. + */ +#ifdef CONFIG_X86_64 +static __always_inline int fls64(__u64 x) +{ + long bitpos = -1; + /* + * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the + * dest reg is undefined if x==0, but their CPU architect says its + * value is written to set it to the same as before. + */ + asm("bsrq %1,%0" + : "+r" (bitpos) + : "rm" (x)); + return bitpos + 1; +} +#else +#include <asm-generic/bitops/fls64.h> +#endif + +#include <asm-generic/bitops/find.h> + +#include <asm-generic/bitops/sched.h> + +#define ARCH_HAS_FAST_MULTIPLIER 1 + +#include <asm/arch_hweight.h> + +#include <asm-generic/bitops/const_hweight.h> + +#include <asm-generic/bitops/le.h> + +#include <asm-generic/bitops/ext2-atomic-setbit.h> + +#endif /* __KERNEL__ */ +#endif /* _ASM_X86_BITOPS_H */ diff --git a/arch/x86/include/asm/bitsperlong.h b/arch/x86/include/asm/bitsperlong.h new file mode 100644 index 00000000000..b0ae1c4dc79 --- /dev/null +++ b/arch/x86/include/asm/bitsperlong.h @@ -0,0 +1,13 @@ +#ifndef __ASM_X86_BITSPERLONG_H +#define __ASM_X86_BITSPERLONG_H + +#ifdef __x86_64__ +# define __BITS_PER_LONG 64 +#else +# define __BITS_PER_LONG 32 +#endif + +#include <asm-generic/bitsperlong.h> + +#endif /* __ASM_X86_BITSPERLONG_H */ + diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h new file mode 100644 index 00000000000..5e1a2eef3e7 --- /dev/null +++ b/arch/x86/include/asm/boot.h @@ -0,0 +1,47 @@ +#ifndef _ASM_X86_BOOT_H +#define _ASM_X86_BOOT_H + +/* Internal svga startup constants */ +#define NORMAL_VGA 0xffff /* 80x25 mode */ +#define EXTENDED_VGA 0xfffe /* 80x50 mode */ +#define ASK_VGA 0xfffd /* ask for it at bootup */ + +#ifdef __KERNEL__ + +#include <asm/pgtable_types.h> + +/* Physical address where kernel should be loaded. */ +#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ + + (CONFIG_PHYSICAL_ALIGN - 1)) \ + & ~(CONFIG_PHYSICAL_ALIGN - 1)) + +/* Minimum kernel alignment, as a power of two */ +#ifdef CONFIG_X86_64 +#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT +#else +#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_ORDER) +#endif +#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) + +#if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ + (CONFIG_PHYSICAL_ALIGN < MIN_KERNEL_ALIGN) +#error "Invalid value for CONFIG_PHYSICAL_ALIGN" +#endif + +#ifdef CONFIG_KERNEL_BZIP2 +#define BOOT_HEAP_SIZE 0x400000 +#else /* !CONFIG_KERNEL_BZIP2 */ + +#define BOOT_HEAP_SIZE 0x8000 + +#endif /* !CONFIG_KERNEL_BZIP2 */ + +#ifdef CONFIG_X86_64 +#define BOOT_STACK_SIZE 0x4000 +#else +#define BOOT_STACK_SIZE 0x1000 +#endif + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_BOOT_H */ diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h new file mode 100644 index 00000000000..2f90c51cc49 --- /dev/null +++ b/arch/x86/include/asm/bootparam.h @@ -0,0 +1,136 @@ +#ifndef _ASM_X86_BOOTPARAM_H +#define _ASM_X86_BOOTPARAM_H + +#include <linux/types.h> +#include <linux/screen_info.h> +#include <linux/apm_bios.h> +#include <linux/edd.h> +#include <asm/e820.h> +#include <asm/ist.h> +#include <video/edid.h> + +/* setup data types */ +#define SETUP_NONE 0 +#define SETUP_E820_EXT 1 +#define SETUP_DTB 2 + +/* extensible setup data list node */ +struct setup_data { + __u64 next; + __u32 type; + __u32 len; + __u8 data[0]; +}; + +struct setup_header { + __u8 setup_sects; + __u16 root_flags; + __u32 syssize; + __u16 ram_size; +#define RAMDISK_IMAGE_START_MASK 0x07FF +#define RAMDISK_PROMPT_FLAG 0x8000 +#define RAMDISK_LOAD_FLAG 0x4000 + __u16 vid_mode; + __u16 root_dev; + __u16 boot_flag; + __u16 jump; + __u32 header; + __u16 version; + __u32 realmode_swtch; + __u16 start_sys; + __u16 kernel_version; + __u8 type_of_loader; + __u8 loadflags; +#define LOADED_HIGH (1<<0) +#define QUIET_FLAG (1<<5) +#define KEEP_SEGMENTS (1<<6) +#define CAN_USE_HEAP (1<<7) + __u16 setup_move_size; + __u32 code32_start; + __u32 ramdisk_image; + __u32 ramdisk_size; + __u32 bootsect_kludge; + __u16 heap_end_ptr; + __u8 ext_loader_ver; + __u8 ext_loader_type; + __u32 cmd_line_ptr; + __u32 initrd_addr_max; + __u32 kernel_alignment; + __u8 relocatable_kernel; + __u8 _pad2[3]; + __u32 cmdline_size; + __u32 hardware_subarch; + __u64 hardware_subarch_data; + __u32 payload_offset; + __u32 payload_length; + __u64 setup_data; + __u64 pref_address; + __u32 init_size; +} __attribute__((packed)); + +struct sys_desc_table { + __u16 length; + __u8 table[14]; +}; + +/* Gleaned from OFW's set-parameters in cpu/x86/pc/linux.fth */ +struct olpc_ofw_header { + __u32 ofw_magic; /* OFW signature */ + __u32 ofw_version; + __u32 cif_handler; /* callback into OFW */ + __u32 irq_desc_table; +} __attribute__((packed)); + +struct efi_info { + __u32 efi_loader_signature; + __u32 efi_systab; + __u32 efi_memdesc_size; + __u32 efi_memdesc_version; + __u32 efi_memmap; + __u32 efi_memmap_size; + __u32 efi_systab_hi; + __u32 efi_memmap_hi; +}; + +/* The so-called "zeropage" */ +struct boot_params { + struct screen_info screen_info; /* 0x000 */ + struct apm_bios_info apm_bios_info; /* 0x040 */ + __u8 _pad2[4]; /* 0x054 */ + __u64 tboot_addr; /* 0x058 */ + struct ist_info ist_info; /* 0x060 */ + __u8 _pad3[16]; /* 0x070 */ + __u8 hd0_info[16]; /* obsolete! */ /* 0x080 */ + __u8 hd1_info[16]; /* obsolete! */ /* 0x090 */ + struct sys_desc_table sys_desc_table; /* 0x0a0 */ + struct olpc_ofw_header olpc_ofw_header; /* 0x0b0 */ + __u8 _pad4[128]; /* 0x0c0 */ + struct edid_info edid_info; /* 0x140 */ + struct efi_info efi_info; /* 0x1c0 */ + __u32 alt_mem_k; /* 0x1e0 */ + __u32 scratch; /* Scratch field! */ /* 0x1e4 */ + __u8 e820_entries; /* 0x1e8 */ + __u8 eddbuf_entries; /* 0x1e9 */ + __u8 edd_mbr_sig_buf_entries; /* 0x1ea */ + __u8 _pad6[6]; /* 0x1eb */ + struct setup_header hdr; /* setup header */ /* 0x1f1 */ + __u8 _pad7[0x290-0x1f1-sizeof(struct setup_header)]; + __u32 edd_mbr_sig_buffer[EDD_MBR_SIG_MAX]; /* 0x290 */ + struct e820entry e820_map[E820MAX]; /* 0x2d0 */ + __u8 _pad8[48]; /* 0xcd0 */ + struct edd_info eddbuf[EDDMAXNR]; /* 0xd00 */ + __u8 _pad9[276]; /* 0xeec */ +} __attribute__((packed)); + +enum { + X86_SUBARCH_PC = 0, + X86_SUBARCH_LGUEST, + X86_SUBARCH_XEN, + X86_SUBARCH_MRST, + X86_SUBARCH_CE4100, + X86_NR_SUBARCHS, +}; + + + +#endif /* _ASM_X86_BOOTPARAM_H */ diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h new file mode 100644 index 00000000000..f654d1bb17f --- /dev/null +++ b/arch/x86/include/asm/bug.h @@ -0,0 +1,39 @@ +#ifndef _ASM_X86_BUG_H +#define _ASM_X86_BUG_H + +#ifdef CONFIG_BUG +#define HAVE_ARCH_BUG + +#ifdef CONFIG_DEBUG_BUGVERBOSE + +#ifdef CONFIG_X86_32 +# define __BUG_C0 "2:\t.long 1b, %c0\n" +#else +# define __BUG_C0 "2:\t.long 1b - 2b, %c0 - 2b\n" +#endif + +#define BUG() \ +do { \ + asm volatile("1:\tud2\n" \ + ".pushsection __bug_table,\"a\"\n" \ + __BUG_C0 \ + "\t.word %c1, 0\n" \ + "\t.org 2b+%c2\n" \ + ".popsection" \ + : : "i" (__FILE__), "i" (__LINE__), \ + "i" (sizeof(struct bug_entry))); \ + unreachable(); \ +} while (0) + +#else +#define BUG() \ +do { \ + asm volatile("ud2"); \ + unreachable(); \ +} while (0) +#endif + +#endif /* !CONFIG_BUG */ + +#include <asm-generic/bug.h> +#endif /* _ASM_X86_BUG_H */ diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h new file mode 100644 index 00000000000..08abf639075 --- /dev/null +++ b/arch/x86/include/asm/bugs.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_BUGS_H +#define _ASM_X86_BUGS_H + +extern void check_bugs(void); + +#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32) +int ppro_with_ram_bug(void); +#else +static inline int ppro_with_ram_bug(void) { return 0; } +#endif + +#endif /* _ASM_X86_BUGS_H */ diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h new file mode 100644 index 00000000000..b13a7a88f3e --- /dev/null +++ b/arch/x86/include/asm/byteorder.h @@ -0,0 +1,6 @@ +#ifndef _ASM_X86_BYTEORDER_H +#define _ASM_X86_BYTEORDER_H + +#include <linux/byteorder/little_endian.h> + +#endif /* _ASM_X86_BYTEORDER_H */ diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h new file mode 100644 index 00000000000..48f99f15452 --- /dev/null +++ b/arch/x86/include/asm/cache.h @@ -0,0 +1,23 @@ +#ifndef _ASM_X86_CACHE_H +#define _ASM_X86_CACHE_H + +#include <linux/linkage.h> + +/* L1 cache line size */ +#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#define __read_mostly __attribute__((__section__(".data..read_mostly"))) + +#define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT +#define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT) + +#ifdef CONFIG_X86_VSMP +#ifdef CONFIG_SMP +#define __cacheline_aligned_in_smp \ + __attribute__((__aligned__(INTERNODE_CACHE_BYTES))) \ + __page_aligned_data +#endif +#endif + +#endif /* _ASM_X86_CACHE_H */ diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h new file mode 100644 index 00000000000..4e12668711e --- /dev/null +++ b/arch/x86/include/asm/cacheflush.h @@ -0,0 +1,165 @@ +#ifndef _ASM_X86_CACHEFLUSH_H +#define _ASM_X86_CACHEFLUSH_H + +/* Caches aren't brain-dead on the intel. */ +#include <asm-generic/cacheflush.h> + +#ifdef CONFIG_X86_PAT +/* + * X86 PAT uses page flags WC and Uncached together to keep track of + * memory type of pages that have backing page struct. X86 PAT supports 3 + * different memory types, _PAGE_CACHE_WB, _PAGE_CACHE_WC and + * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not + * been changed from its default (value of -1 used to denote this). + * Note we do not support _PAGE_CACHE_UC here. + */ + +#define _PGMT_DEFAULT 0 +#define _PGMT_WC (1UL << PG_arch_1) +#define _PGMT_UC_MINUS (1UL << PG_uncached) +#define _PGMT_WB (1UL << PG_uncached | 1UL << PG_arch_1) +#define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1) +#define _PGMT_CLEAR_MASK (~_PGMT_MASK) + +static inline unsigned long get_page_memtype(struct page *pg) +{ + unsigned long pg_flags = pg->flags & _PGMT_MASK; + + if (pg_flags == _PGMT_DEFAULT) + return -1; + else if (pg_flags == _PGMT_WC) + return _PAGE_CACHE_WC; + else if (pg_flags == _PGMT_UC_MINUS) + return _PAGE_CACHE_UC_MINUS; + else + return _PAGE_CACHE_WB; +} + +static inline void set_page_memtype(struct page *pg, unsigned long memtype) +{ + unsigned long memtype_flags = _PGMT_DEFAULT; + unsigned long old_flags; + unsigned long new_flags; + + switch (memtype) { + case _PAGE_CACHE_WC: + memtype_flags = _PGMT_WC; + break; + case _PAGE_CACHE_UC_MINUS: + memtype_flags = _PGMT_UC_MINUS; + break; + case _PAGE_CACHE_WB: + memtype_flags = _PGMT_WB; + break; + } + + do { + old_flags = pg->flags; + new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags; + } while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags); +} +#else +static inline unsigned long get_page_memtype(struct page *pg) { return -1; } +static inline void set_page_memtype(struct page *pg, unsigned long memtype) { } +#endif + +/* + * The set_memory_* API can be used to change various attributes of a virtual + * address range. The attributes include: + * Cachability : UnCached, WriteCombining, WriteBack + * Executability : eXeutable, NoteXecutable + * Read/Write : ReadOnly, ReadWrite + * Presence : NotPresent + * + * Within a category, the attributes are mutually exclusive. + * + * The implementation of this API will take care of various aspects that + * are associated with changing such attributes, such as: + * - Flushing TLBs + * - Flushing CPU caches + * - Making sure aliases of the memory behind the mapping don't violate + * coherency rules as defined by the CPU in the system. + * + * What this API does not do: + * - Provide exclusion between various callers - including callers that + * operation on other mappings of the same physical page + * - Restore default attributes when a page is freed + * - Guarantee that mappings other than the requested one are + * in any state, other than that these do not violate rules for + * the CPU you have. Do not depend on any effects on other mappings, + * CPUs other than the one you have may have more relaxed rules. + * The caller is required to take care of these. + */ + +int _set_memory_uc(unsigned long addr, int numpages); +int _set_memory_wc(unsigned long addr, int numpages); +int _set_memory_wb(unsigned long addr, int numpages); +int set_memory_uc(unsigned long addr, int numpages); +int set_memory_wc(unsigned long addr, int numpages); +int set_memory_wb(unsigned long addr, int numpages); +int set_memory_x(unsigned long addr, int numpages); +int set_memory_nx(unsigned long addr, int numpages); +int set_memory_ro(unsigned long addr, int numpages); +int set_memory_rw(unsigned long addr, int numpages); +int set_memory_np(unsigned long addr, int numpages); +int set_memory_4k(unsigned long addr, int numpages); + +int set_memory_array_uc(unsigned long *addr, int addrinarray); +int set_memory_array_wc(unsigned long *addr, int addrinarray); +int set_memory_array_wb(unsigned long *addr, int addrinarray); + +int set_pages_array_uc(struct page **pages, int addrinarray); +int set_pages_array_wc(struct page **pages, int addrinarray); +int set_pages_array_wb(struct page **pages, int addrinarray); + +/* + * For legacy compatibility with the old APIs, a few functions + * are provided that work on a "struct page". + * These functions operate ONLY on the 1:1 kernel mapping of the + * memory that the struct page represents, and internally just + * call the set_memory_* function. See the description of the + * set_memory_* function for more details on conventions. + * + * These APIs should be considered *deprecated* and are likely going to + * be removed in the future. + * The reason for this is the implicit operation on the 1:1 mapping only, + * making this not a generally useful API. + * + * Specifically, many users of the old APIs had a virtual address, + * called virt_to_page() or vmalloc_to_page() on that address to + * get a struct page* that the old API required. + * To convert these cases, use set_memory_*() on the original + * virtual address, do not use these functions. + */ + +int set_pages_uc(struct page *page, int numpages); +int set_pages_wb(struct page *page, int numpages); +int set_pages_x(struct page *page, int numpages); +int set_pages_nx(struct page *page, int numpages); +int set_pages_ro(struct page *page, int numpages); +int set_pages_rw(struct page *page, int numpages); + + +void clflush_cache_range(void *addr, unsigned int size); + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void); +extern const int rodata_test_data; +extern int kernel_set_to_readonly; +void set_kernel_text_rw(void); +void set_kernel_text_ro(void); +#else +static inline void set_kernel_text_rw(void) { } +static inline void set_kernel_text_ro(void) { } +#endif + +#ifdef CONFIG_DEBUG_RODATA_TEST +int rodata_test(void); +#else +static inline int rodata_test(void) +{ + return 0; +} +#endif + +#endif /* _ASM_X86_CACHEFLUSH_H */ diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h new file mode 100644 index 00000000000..0d467b33883 --- /dev/null +++ b/arch/x86/include/asm/calgary.h @@ -0,0 +1,70 @@ +/* + * Derived from include/asm-powerpc/iommu.h + * + * Copyright IBM Corporation, 2006-2007 + * + * Author: Jon Mason <jdmason@us.ibm.com> + * Author: Muli Ben-Yehuda <muli@il.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_X86_CALGARY_H +#define _ASM_X86_CALGARY_H + +#include <linux/spinlock.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/timer.h> +#include <asm/types.h> + +struct iommu_table { + struct cal_chipset_ops *chip_ops; /* chipset specific funcs */ + unsigned long it_base; /* mapped address of tce table */ + unsigned long it_hint; /* Hint for next alloc */ + unsigned long *it_map; /* A simple allocation bitmap for now */ + void __iomem *bbar; /* Bridge BAR */ + u64 tar_val; /* Table Address Register */ + struct timer_list watchdog_timer; + spinlock_t it_lock; /* Protects it_map */ + unsigned int it_size; /* Size of iommu table in entries */ + unsigned char it_busno; /* Bus number this table belongs to */ +}; + +struct cal_chipset_ops { + void (*handle_quirks)(struct iommu_table *tbl, struct pci_dev *dev); + void (*tce_cache_blast)(struct iommu_table *tbl); + void (*dump_error_regs)(struct iommu_table *tbl); +}; + +#define TCE_TABLE_SIZE_UNSPECIFIED ~0 +#define TCE_TABLE_SIZE_64K 0 +#define TCE_TABLE_SIZE_128K 1 +#define TCE_TABLE_SIZE_256K 2 +#define TCE_TABLE_SIZE_512K 3 +#define TCE_TABLE_SIZE_1M 4 +#define TCE_TABLE_SIZE_2M 5 +#define TCE_TABLE_SIZE_4M 6 +#define TCE_TABLE_SIZE_8M 7 + +extern int use_calgary; + +#ifdef CONFIG_CALGARY_IOMMU +extern int detect_calgary(void); +#else +static inline int detect_calgary(void) { return -ENODEV; } +#endif + +#endif /* _ASM_X86_CALGARY_H */ diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h new file mode 100644 index 00000000000..a9e3a740f69 --- /dev/null +++ b/arch/x86/include/asm/calling.h @@ -0,0 +1,196 @@ +/* + + x86 function call convention, 64-bit: + ------------------------------------- + arguments | callee-saved | extra caller-saved | return + [callee-clobbered] | | [callee-clobbered] | + --------------------------------------------------------------------------- + rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**] + + ( rsp is obviously invariant across normal function calls. (gcc can 'merge' + functions when it sees tail-call optimization possibilities) rflags is + clobbered. Leftover arguments are passed over the stack frame.) + + [*] In the frame-pointers case rbp is fixed to the stack frame. + + [**] for struct return values wider than 64 bits the return convention is a + bit more complex: up to 128 bits width we return small structures + straight in rax, rdx. For structures larger than that (3 words or + larger) the caller puts a pointer to an on-stack return struct + [allocated in the caller's stack frame] into the first argument - i.e. + into rdi. All other arguments shift up by one in this case. + Fortunately this case is rare in the kernel. + +For 32-bit we have the following conventions - kernel is built with +-mregparm=3 and -freg-struct-return: + + x86 function calling convention, 32-bit: + ---------------------------------------- + arguments | callee-saved | extra caller-saved | return + [callee-clobbered] | | [callee-clobbered] | + ------------------------------------------------------------------------- + eax edx ecx | ebx edi esi ebp [*] | <none> | eax, edx [**] + + ( here too esp is obviously invariant across normal function calls. eflags + is clobbered. Leftover arguments are passed over the stack frame. ) + + [*] In the frame-pointers case ebp is fixed to the stack frame. + + [**] We build with -freg-struct-return, which on 32-bit means similar + semantics as on 64-bit: edx can be used for a second return value + (i.e. covering integer and structure sizes up to 64 bits) - after that + it gets more complex and more expensive: 3-word or larger struct returns + get done in the caller's frame and the pointer to the return struct goes + into regparm0, i.e. eax - the other arguments shift up and the + function's register parameters degenerate to regparm=2 in essence. + +*/ + +#include "dwarf2.h" + +/* + * 64-bit system call stack frame layout defines and helpers, for + * assembly code (note that the seemingly unnecessary parentheses + * are to prevent cpp from inserting spaces in expressions that get + * passed to macros): + */ + +#define R15 (0) +#define R14 (8) +#define R13 (16) +#define R12 (24) +#define RBP (32) +#define RBX (40) + +/* arguments: interrupts/non tracing syscalls only save up to here: */ +#define R11 (48) +#define R10 (56) +#define R9 (64) +#define R8 (72) +#define RAX (80) +#define RCX (88) +#define RDX (96) +#define RSI (104) +#define RDI (112) +#define ORIG_RAX (120) /* + error_code */ +/* end of arguments */ + +/* cpu exception frame or undefined in case of fast syscall: */ +#define RIP (128) +#define CS (136) +#define EFLAGS (144) +#define RSP (152) +#define SS (160) + +#define ARGOFFSET R11 +#define SWFRAME ORIG_RAX + + .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1 + subq $9*8+\addskip, %rsp + CFI_ADJUST_CFA_OFFSET 9*8+\addskip + movq_cfi rdi, 8*8 + movq_cfi rsi, 7*8 + movq_cfi rdx, 6*8 + + .if \save_rcx + movq_cfi rcx, 5*8 + .endif + + movq_cfi rax, 4*8 + + .if \save_r891011 + movq_cfi r8, 3*8 + movq_cfi r9, 2*8 + movq_cfi r10, 1*8 + movq_cfi r11, 0*8 + .endif + + .endm + +#define ARG_SKIP (9*8) + + .macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \ + rstor_r8910=1, rstor_rdx=1 + .if \rstor_r11 + movq_cfi_restore 0*8, r11 + .endif + + .if \rstor_r8910 + movq_cfi_restore 1*8, r10 + movq_cfi_restore 2*8, r9 + movq_cfi_restore 3*8, r8 + .endif + + .if \rstor_rax + movq_cfi_restore 4*8, rax + .endif + + .if \rstor_rcx + movq_cfi_restore 5*8, rcx + .endif + + .if \rstor_rdx + movq_cfi_restore 6*8, rdx + .endif + + movq_cfi_restore 7*8, rsi + movq_cfi_restore 8*8, rdi + + .if ARG_SKIP+\addskip > 0 + addq $ARG_SKIP+\addskip, %rsp + CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip) + .endif + .endm + + .macro LOAD_ARGS offset, skiprax=0 + movq \offset(%rsp), %r11 + movq \offset+8(%rsp), %r10 + movq \offset+16(%rsp), %r9 + movq \offset+24(%rsp), %r8 + movq \offset+40(%rsp), %rcx + movq \offset+48(%rsp), %rdx + movq \offset+56(%rsp), %rsi + movq \offset+64(%rsp), %rdi + .if \skiprax + .else + movq \offset+72(%rsp), %rax + .endif + .endm + +#define REST_SKIP (6*8) + + .macro SAVE_REST + subq $REST_SKIP, %rsp + CFI_ADJUST_CFA_OFFSET REST_SKIP + movq_cfi rbx, 5*8 + movq_cfi rbp, 4*8 + movq_cfi r12, 3*8 + movq_cfi r13, 2*8 + movq_cfi r14, 1*8 + movq_cfi r15, 0*8 + .endm + + .macro RESTORE_REST + movq_cfi_restore 0*8, r15 + movq_cfi_restore 1*8, r14 + movq_cfi_restore 2*8, r13 + movq_cfi_restore 3*8, r12 + movq_cfi_restore 4*8, rbp + movq_cfi_restore 5*8, rbx + addq $REST_SKIP, %rsp + CFI_ADJUST_CFA_OFFSET -(REST_SKIP) + .endm + + .macro SAVE_ALL + SAVE_ARGS + SAVE_REST + .endm + + .macro RESTORE_ALL addskip=0 + RESTORE_REST + RESTORE_ARGS 1, \addskip + .endm + + .macro icebp + .byte 0xf1 + .endm diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h new file mode 100644 index 00000000000..e656ad8c0a2 --- /dev/null +++ b/arch/x86/include/asm/ce4100.h @@ -0,0 +1,6 @@ +#ifndef _ASM_CE4100_H_ +#define _ASM_CE4100_H_ + +int ce4100_pci_init(void); + +#endif diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h new file mode 100644 index 00000000000..848850fd7d6 --- /dev/null +++ b/arch/x86/include/asm/checksum.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "checksum_32.h" +#else +# include "checksum_64.h" +#endif diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h new file mode 100644 index 00000000000..46fc474fd81 --- /dev/null +++ b/arch/x86/include/asm/checksum_32.h @@ -0,0 +1,190 @@ +#ifndef _ASM_X86_CHECKSUM_32_H +#define _ASM_X86_CHECKSUM_32_H + +#include <linux/in6.h> + +#include <asm/uaccess.h> + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +asmlinkage __wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * the same as csum_partial, but copies from src while it + * checksums, and handles user-space pointer exceptions correctly, when needed. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ + +asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst, + int len, __wsum sum, + int *src_err_ptr, int *dst_err_ptr); + +/* + * Note: when you get a NULL pointer exception here this means someone + * passed in an incorrect kernel address to one of these functions. + * + * If you use these functions directly please don't forget the + * access_ok(). + */ +static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, + int len, __wsum sum) +{ + return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL); +} + +static inline __wsum csum_partial_copy_from_user(const void __user *src, + void *dst, + int len, __wsum sum, + int *err_ptr) +{ + might_sleep(); + return csum_partial_copy_generic((__force void *)src, dst, + len, sum, err_ptr, NULL); +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * + * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by + * Arnt Gulbrandsen. + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int sum; + + asm volatile("movl (%1), %0 ;\n" + "subl $4, %2 ;\n" + "jbe 2f ;\n" + "addl 4(%1), %0 ;\n" + "adcl 8(%1), %0 ;\n" + "adcl 12(%1), %0;\n" + "1: adcl 16(%1), %0 ;\n" + "lea 4(%1), %1 ;\n" + "decl %2 ;\n" + "jne 1b ;\n" + "adcl $0, %0 ;\n" + "movl %0, %2 ;\n" + "shrl $16, %0 ;\n" + "addw %w2, %w0 ;\n" + "adcl $0, %0 ;\n" + "notl %0 ;\n" + "2: ;\n" + /* Since the input registers which are loaded with iph and ihl + are modified, we must also specify them as outputs, or gcc + will assume they contain their original values. */ + : "=r" (sum), "=r" (iph), "=r" (ihl) + : "1" (iph), "2" (ihl) + : "memory"); + return (__force __sum16)sum; +} + +/* + * Fold a partial checksum + */ + +static inline __sum16 csum_fold(__wsum sum) +{ + asm("addl %1, %0 ;\n" + "adcl $0xffff, %0 ;\n" + : "=r" (sum) + : "r" ((__force u32)sum << 16), + "0" ((__force u32)sum & 0xffff0000)); + return (__force __sum16)(~(__force u32)sum >> 16); +} + +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + asm("addl %1, %0 ;\n" + "adcl %2, %0 ;\n" + "adcl %3, %0 ;\n" + "adcl $0, %0 ;\n" + : "=r" (sum) + : "g" (daddr), "g"(saddr), + "g" ((len + proto) << 8), "0" (sum)); + return sum; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ + +static inline __sum16 ip_compute_csum(const void *buff, int len) +{ + return csum_fold(csum_partial(buff, len, 0)); +} + +#define _HAVE_ARCH_IPV6_CSUM +static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, unsigned short proto, + __wsum sum) +{ + asm("addl 0(%1), %0 ;\n" + "adcl 4(%1), %0 ;\n" + "adcl 8(%1), %0 ;\n" + "adcl 12(%1), %0 ;\n" + "adcl 0(%2), %0 ;\n" + "adcl 4(%2), %0 ;\n" + "adcl 8(%2), %0 ;\n" + "adcl 12(%2), %0 ;\n" + "adcl %3, %0 ;\n" + "adcl %4, %0 ;\n" + "adcl $0, %0 ;\n" + : "=&r" (sum) + : "r" (saddr), "r" (daddr), + "r" (htonl(len)), "r" (htonl(proto)), "0" (sum) + : "memory"); + + return csum_fold(sum); +} + +/* + * Copy and checksum to user + */ +#define HAVE_CSUM_COPY_USER +static inline __wsum csum_and_copy_to_user(const void *src, + void __user *dst, + int len, __wsum sum, + int *err_ptr) +{ + might_sleep(); + if (access_ok(VERIFY_WRITE, dst, len)) + return csum_partial_copy_generic(src, (__force void *)dst, + len, sum, NULL, err_ptr); + + if (len) + *err_ptr = -EFAULT; + + return (__force __wsum)-1; /* invalid checksum */ +} + +#endif /* _ASM_X86_CHECKSUM_32_H */ diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h new file mode 100644 index 00000000000..9bfdc41629e --- /dev/null +++ b/arch/x86/include/asm/checksum_64.h @@ -0,0 +1,191 @@ +#ifndef _ASM_X86_CHECKSUM_64_H +#define _ASM_X86_CHECKSUM_64_H + +/* + * Checksums for x86-64 + * Copyright 2002 by Andi Kleen, SuSE Labs + * with some code from asm-x86/checksum.h + */ + +#include <linux/compiler.h> +#include <asm/uaccess.h> +#include <asm/byteorder.h> + +/** + * csum_fold - Fold and invert a 32bit checksum. + * sum: 32bit unfolded sum + * + * Fold a 32bit running checksum to 16bit and invert it. This is usually + * the last step before putting a checksum into a packet. + * Make sure not to mix with 64bit checksums. + */ +static inline __sum16 csum_fold(__wsum sum) +{ + asm(" addl %1,%0\n" + " adcl $0xffff,%0" + : "=r" (sum) + : "r" ((__force u32)sum << 16), + "0" ((__force u32)sum & 0xffff0000)); + return (__force __sum16)(~(__force u32)sum >> 16); +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * + * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by + * Arnt Gulbrandsen. + */ + +/** + * ip_fast_csum - Compute the IPv4 header checksum efficiently. + * iph: ipv4 header + * ihl: length of header / 4 + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int sum; + + asm(" movl (%1), %0\n" + " subl $4, %2\n" + " jbe 2f\n" + " addl 4(%1), %0\n" + " adcl 8(%1), %0\n" + " adcl 12(%1), %0\n" + "1: adcl 16(%1), %0\n" + " lea 4(%1), %1\n" + " decl %2\n" + " jne 1b\n" + " adcl $0, %0\n" + " movl %0, %2\n" + " shrl $16, %0\n" + " addw %w2, %w0\n" + " adcl $0, %0\n" + " notl %0\n" + "2:" + /* Since the input registers which are loaded with iph and ihl + are modified, we must also specify them as outputs, or gcc + will assume they contain their original values. */ + : "=r" (sum), "=r" (iph), "=r" (ihl) + : "1" (iph), "2" (ihl) + : "memory"); + return (__force __sum16)sum; +} + +/** + * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum. + * @saddr: source address + * @daddr: destination address + * @len: length of packet + * @proto: ip protocol of packet + * @sum: initial sum to be added in (32bit unfolded) + * + * Returns the pseudo header checksum the input data. Result is + * 32bit unfolded. + */ +static inline __wsum +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + asm(" addl %1, %0\n" + " adcl %2, %0\n" + " adcl %3, %0\n" + " adcl $0, %0\n" + : "=r" (sum) + : "g" (daddr), "g" (saddr), + "g" ((len + proto)<<8), "0" (sum)); + return sum; +} + + +/** + * csum_tcpup_magic - Compute an IPv4 pseudo header checksum. + * @saddr: source address + * @daddr: destination address + * @len: length of packet + * @proto: ip protocol of packet + * @sum: initial sum to be added in (32bit unfolded) + * + * Returns the 16bit pseudo header checksum the input data already + * complemented and ready to be filled in. + */ +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + +/** + * csum_partial - Compute an internet checksum. + * @buff: buffer to be checksummed + * @len: length of buffer. + * @sum: initial sum to be added in (32bit unfolded) + * + * Returns the 32bit unfolded internet checksum of the buffer. + * Before filling it in it needs to be csum_fold()'ed. + * buff should be aligned to a 64bit boundary if possible. + */ +extern __wsum csum_partial(const void *buff, int len, __wsum sum); + +#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1 +#define HAVE_CSUM_COPY_USER 1 + + +/* Do not call this directly. Use the wrappers below */ +extern __wsum csum_partial_copy_generic(const void *src, const void *dst, + int len, __wsum sum, + int *src_err_ptr, int *dst_err_ptr); + + +extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum isum, int *errp); +extern __wsum csum_partial_copy_to_user(const void *src, void __user *dst, + int len, __wsum isum, int *errp); +extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, + int len, __wsum sum); + +/* Old names. To be removed. */ +#define csum_and_copy_to_user csum_partial_copy_to_user +#define csum_and_copy_from_user csum_partial_copy_from_user + +/** + * ip_compute_csum - Compute an 16bit IP checksum. + * @buff: buffer address. + * @len: length of buffer. + * + * Returns the 16bit folded/inverted checksum of the passed buffer. + * Ready to fill in. + */ +extern __sum16 ip_compute_csum(const void *buff, int len); + +/** + * csum_ipv6_magic - Compute checksum of an IPv6 pseudo header. + * @saddr: source address + * @daddr: destination address + * @len: length of packet + * @proto: protocol of packet + * @sum: initial sum (32bit unfolded) to be added in + * + * Computes an IPv6 pseudo header checksum. This sum is added the checksum + * into UDP/TCP packets and contains some link layer information. + * Returns the unfolded 32bit checksum. + */ + +struct in6_addr; + +#define _HAVE_ARCH_IPV6_CSUM 1 +extern __sum16 +csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, + __u32 len, unsigned short proto, __wsum sum); + +static inline unsigned add32_with_carry(unsigned a, unsigned b) +{ + asm("addl %2,%0\n\t" + "adcl $0,%0" + : "=r" (a) + : "0" (a), "r" (b)); + return a; +} + +#endif /* _ASM_X86_CHECKSUM_64_H */ diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h new file mode 100644 index 00000000000..0bdbbb3b9ce --- /dev/null +++ b/arch/x86/include/asm/clocksource.h @@ -0,0 +1,18 @@ +/* x86-specific clocksource additions */ + +#ifndef _ASM_X86_CLOCKSOURCE_H +#define _ASM_X86_CLOCKSOURCE_H + +#ifdef CONFIG_X86_64 + +#define VCLOCK_NONE 0 /* No vDSO clock available. */ +#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ +#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ + +struct arch_clocksource_data { + int vclock_mode; +}; + +#endif /* CONFIG_X86_64 */ + +#endif /* _ASM_X86_CLOCKSOURCE_H */ diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h new file mode 100644 index 00000000000..99480e55973 --- /dev/null +++ b/arch/x86/include/asm/cmpxchg.h @@ -0,0 +1,233 @@ +#ifndef ASM_X86_CMPXCHG_H +#define ASM_X86_CMPXCHG_H + +#include <linux/compiler.h> +#include <asm/alternative.h> /* Provides LOCK_PREFIX */ + +/* + * Non-existant functions to indicate usage errors at link time + * (or compile-time if the compiler implements __compiletime_error(). + */ +extern void __xchg_wrong_size(void) + __compiletime_error("Bad argument size for xchg"); +extern void __cmpxchg_wrong_size(void) + __compiletime_error("Bad argument size for cmpxchg"); +extern void __xadd_wrong_size(void) + __compiletime_error("Bad argument size for xadd"); +extern void __add_wrong_size(void) + __compiletime_error("Bad argument size for add"); + +/* + * Constants for operation sizes. On 32-bit, the 64-bit size it set to + * -1 because sizeof will never return -1, thereby making those switch + * case statements guaranteeed dead code which the compiler will + * eliminate, and allowing the "missing symbol in the default case" to + * indicate a usage error. + */ +#define __X86_CASE_B 1 +#define __X86_CASE_W 2 +#define __X86_CASE_L 4 +#ifdef CONFIG_64BIT +#define __X86_CASE_Q 8 +#else +#define __X86_CASE_Q -1 /* sizeof will never return -1 */ +#endif + +/* + * An exchange-type operation, which takes a value and a pointer, and + * returns a the old value. + */ +#define __xchg_op(ptr, arg, op, lock) \ + ({ \ + __typeof__ (*(ptr)) __ret = (arg); \ + switch (sizeof(*(ptr))) { \ + case __X86_CASE_B: \ + asm volatile (lock #op "b %b0, %1\n" \ + : "+q" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + case __X86_CASE_W: \ + asm volatile (lock #op "w %w0, %1\n" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + case __X86_CASE_L: \ + asm volatile (lock #op "l %0, %1\n" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + case __X86_CASE_Q: \ + asm volatile (lock #op "q %q0, %1\n" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + default: \ + __ ## op ## _wrong_size(); \ + } \ + __ret; \ + }) + +/* + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. + * Since this is generally used to protect other memory information, we + * use "asm volatile" and "memory" clobbers to prevent gcc from moving + * information around. + */ +#define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "") + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ +#define __raw_cmpxchg(ptr, old, new, size, lock) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + switch (size) { \ + case __X86_CASE_B: \ + { \ + volatile u8 *__ptr = (volatile u8 *)(ptr); \ + asm volatile(lock "cmpxchgb %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "q" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_W: \ + { \ + volatile u16 *__ptr = (volatile u16 *)(ptr); \ + asm volatile(lock "cmpxchgw %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_L: \ + { \ + volatile u32 *__ptr = (volatile u32 *)(ptr); \ + asm volatile(lock "cmpxchgl %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_Q: \ + { \ + volatile u64 *__ptr = (volatile u64 *)(ptr); \ + asm volatile(lock "cmpxchgq %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ +}) + +#define __cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) + +#define __sync_cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") + +#define __cmpxchg_local(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "") + +#ifdef CONFIG_X86_32 +# include "cmpxchg_32.h" +#else +# include "cmpxchg_64.h" +#endif + +#ifdef __HAVE_ARCH_CMPXCHG +#define cmpxchg(ptr, old, new) \ + __cmpxchg(ptr, old, new, sizeof(*(ptr))) + +#define sync_cmpxchg(ptr, old, new) \ + __sync_cmpxchg(ptr, old, new, sizeof(*(ptr))) + +#define cmpxchg_local(ptr, old, new) \ + __cmpxchg_local(ptr, old, new, sizeof(*(ptr))) +#endif + +/* + * xadd() adds "inc" to "*ptr" and atomically returns the previous + * value of "*ptr". + * + * xadd() is locked when multiple CPUs are online + * xadd_sync() is always locked + * xadd_local() is never locked + */ +#define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock) +#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) +#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ") +#define xadd_local(ptr, inc) __xadd((ptr), (inc), "") + +#define __add(ptr, inc, lock) \ + ({ \ + __typeof__ (*(ptr)) __ret = (inc); \ + switch (sizeof(*(ptr))) { \ + case __X86_CASE_B: \ + asm volatile (lock "addb %b1, %0\n" \ + : "+m" (*(ptr)) : "qi" (inc) \ + : "memory", "cc"); \ + break; \ + case __X86_CASE_W: \ + asm volatile (lock "addw %w1, %0\n" \ + : "+m" (*(ptr)) : "ri" (inc) \ + : "memory", "cc"); \ + break; \ + case __X86_CASE_L: \ + asm volatile (lock "addl %1, %0\n" \ + : "+m" (*(ptr)) : "ri" (inc) \ + : "memory", "cc"); \ + break; \ + case __X86_CASE_Q: \ + asm volatile (lock "addq %1, %0\n" \ + : "+m" (*(ptr)) : "ri" (inc) \ + : "memory", "cc"); \ + break; \ + default: \ + __add_wrong_size(); \ + } \ + __ret; \ + }) + +/* + * add_*() adds "inc" to "*ptr" + * + * __add() takes a lock prefix + * add_smp() is locked when multiple CPUs are online + * add_sync() is always locked + */ +#define add_smp(ptr, inc) __add((ptr), (inc), LOCK_PREFIX) +#define add_sync(ptr, inc) __add((ptr), (inc), "lock; ") + +#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \ +({ \ + bool __ret; \ + __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \ + __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \ + BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ + BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ + VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \ + VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \ + asm volatile(pfx "cmpxchg%c4b %2; sete %0" \ + : "=a" (__ret), "+d" (__old2), \ + "+m" (*(p1)), "+m" (*(p2)) \ + : "i" (2 * sizeof(long)), "a" (__old1), \ + "b" (__new1), "c" (__new2)); \ + __ret; \ +}) + +#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \ + __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2) + +#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ + __cmpxchg_double(, p1, p2, o1, o2, n1, n2) + +#endif /* ASM_X86_CMPXCHG_H */ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h new file mode 100644 index 00000000000..53f4b219336 --- /dev/null +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -0,0 +1,171 @@ +#ifndef _ASM_X86_CMPXCHG_32_H +#define _ASM_X86_CMPXCHG_32_H + +/* + * Note: if you use set64_bit(), __cmpxchg64(), or their variants, you + * you need to test for the feature in boot_cpu_data. + */ + +/* + * CMPXCHG8B only writes to the target if we had the previous + * value in registers, otherwise it acts as a read and gives us the + * "new previous" value. That is why there is a loop. Preloading + * EDX:EAX is a performance optimization: in the common case it means + * we need only one locked operation. + * + * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very + * least an FPU save and/or %cr0.ts manipulation. + * + * cmpxchg8b must be used with the lock prefix here to allow the + * instruction to be executed atomically. We need to have the reader + * side to see the coherent 64bit value. + */ +static inline void set_64bit(volatile u64 *ptr, u64 value) +{ + u32 low = value; + u32 high = value >> 32; + u64 prev = *ptr; + + asm volatile("\n1:\t" + LOCK_PREFIX "cmpxchg8b %0\n\t" + "jnz 1b" + : "=m" (*ptr), "+A" (prev) + : "b" (low), "c" (high) + : "memory"); +} + +#ifdef CONFIG_X86_CMPXCHG +#define __HAVE_ARCH_CMPXCHG 1 +#endif + +#ifdef CONFIG_X86_CMPXCHG64 +#define cmpxchg64(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \ + (unsigned long long)(n))) +#define cmpxchg64_local(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \ + (unsigned long long)(n))) +#endif + +static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new) +{ + u64 prev; + asm volatile(LOCK_PREFIX "cmpxchg8b %1" + : "=A" (prev), + "+m" (*ptr) + : "b" ((u32)new), + "c" ((u32)(new >> 32)), + "0" (old) + : "memory"); + return prev; +} + +static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) +{ + u64 prev; + asm volatile("cmpxchg8b %1" + : "=A" (prev), + "+m" (*ptr) + : "b" ((u32)new), + "c" ((u32)(new >> 32)), + "0" (old) + : "memory"); + return prev; +} + +#ifndef CONFIG_X86_CMPXCHG +/* + * Building a kernel capable running on 80386. It may be necessary to + * simulate the cmpxchg on the 80386 CPU. For that purpose we define + * a function for each of the sizes we support. + */ + +extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8); +extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16); +extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32); + +static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + switch (size) { + case 1: + return cmpxchg_386_u8(ptr, old, new); + case 2: + return cmpxchg_386_u16(ptr, old, new); + case 4: + return cmpxchg_386_u32(ptr, old, new); + } + return old; +} + +#define cmpxchg(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + if (likely(boot_cpu_data.x86 > 3)) \ + __ret = (__typeof__(*(ptr)))__cmpxchg((ptr), \ + (unsigned long)(o), (unsigned long)(n), \ + sizeof(*(ptr))); \ + else \ + __ret = (__typeof__(*(ptr)))cmpxchg_386((ptr), \ + (unsigned long)(o), (unsigned long)(n), \ + sizeof(*(ptr))); \ + __ret; \ +}) +#define cmpxchg_local(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + if (likely(boot_cpu_data.x86 > 3)) \ + __ret = (__typeof__(*(ptr)))__cmpxchg_local((ptr), \ + (unsigned long)(o), (unsigned long)(n), \ + sizeof(*(ptr))); \ + else \ + __ret = (__typeof__(*(ptr)))cmpxchg_386((ptr), \ + (unsigned long)(o), (unsigned long)(n), \ + sizeof(*(ptr))); \ + __ret; \ +}) +#endif + +#ifndef CONFIG_X86_CMPXCHG64 +/* + * Building a kernel capable running on 80386 and 80486. It may be necessary + * to simulate the cmpxchg8b on the 80386 and 80486 CPU. + */ + +#define cmpxchg64(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (o); \ + __typeof__(*(ptr)) __new = (n); \ + alternative_io(LOCK_PREFIX_HERE \ + "call cmpxchg8b_emu", \ + "lock; cmpxchg8b (%%esi)" , \ + X86_FEATURE_CX8, \ + "=A" (__ret), \ + "S" ((ptr)), "0" (__old), \ + "b" ((unsigned int)__new), \ + "c" ((unsigned int)(__new>>32)) \ + : "memory"); \ + __ret; }) + + +#define cmpxchg64_local(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (o); \ + __typeof__(*(ptr)) __new = (n); \ + alternative_io("call cmpxchg8b_emu", \ + "cmpxchg8b (%%esi)" , \ + X86_FEATURE_CX8, \ + "=A" (__ret), \ + "S" ((ptr)), "0" (__old), \ + "b" ((unsigned int)__new), \ + "c" ((unsigned int)(__new>>32)) \ + : "memory"); \ + __ret; }) + +#endif + +#define system_has_cmpxchg_double() cpu_has_cx8 + +#endif /* _ASM_X86_CMPXCHG_32_H */ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h new file mode 100644 index 00000000000..614be87f1a9 --- /dev/null +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -0,0 +1,25 @@ +#ifndef _ASM_X86_CMPXCHG_64_H +#define _ASM_X86_CMPXCHG_64_H + +static inline void set_64bit(volatile u64 *ptr, u64 val) +{ + *ptr = val; +} + +#define __HAVE_ARCH_CMPXCHG 1 + +#define cmpxchg64(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg((ptr), (o), (n)); \ +}) + +#define cmpxchg64_local(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg_local((ptr), (o), (n)); \ +}) + +#define system_has_cmpxchg_double() cpu_has_cx16 + +#endif /* _ASM_X86_CMPXCHG_64_H */ diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h new file mode 100644 index 00000000000..30d737ef2a4 --- /dev/null +++ b/arch/x86/include/asm/compat.h @@ -0,0 +1,220 @@ +#ifndef _ASM_X86_COMPAT_H +#define _ASM_X86_COMPAT_H + +/* + * Architecture specific compatibility types + */ +#include <linux/types.h> +#include <linux/sched.h> +#include <asm/user32.h> + +#define COMPAT_USER_HZ 100 +#define COMPAT_UTS_MACHINE "i686\0\0" + +typedef u32 compat_size_t; +typedef s32 compat_ssize_t; +typedef s32 compat_time_t; +typedef s32 compat_clock_t; +typedef s32 compat_pid_t; +typedef u16 __compat_uid_t; +typedef u16 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; +typedef u16 compat_mode_t; +typedef u32 compat_ino_t; +typedef u16 compat_dev_t; +typedef s32 compat_off_t; +typedef s64 compat_loff_t; +typedef u16 compat_nlink_t; +typedef u16 compat_ipc_pid_t; +typedef s32 compat_daddr_t; +typedef u32 compat_caddr_t; +typedef __kernel_fsid_t compat_fsid_t; +typedef s32 compat_timer_t; +typedef s32 compat_key_t; + +typedef s32 compat_int_t; +typedef s32 compat_long_t; +typedef s64 __attribute__((aligned(4))) compat_s64; +typedef u32 compat_uint_t; +typedef u32 compat_ulong_t; +typedef u64 __attribute__((aligned(4))) compat_u64; + +struct compat_timespec { + compat_time_t tv_sec; + s32 tv_nsec; +}; + +struct compat_timeval { + compat_time_t tv_sec; + s32 tv_usec; +}; + +struct compat_stat { + compat_dev_t st_dev; + u16 __pad1; + compat_ino_t st_ino; + compat_mode_t st_mode; + compat_nlink_t st_nlink; + __compat_uid_t st_uid; + __compat_gid_t st_gid; + compat_dev_t st_rdev; + u16 __pad2; + u32 st_size; + u32 st_blksize; + u32 st_blocks; + u32 st_atime; + u32 st_atime_nsec; + u32 st_mtime; + u32 st_mtime_nsec; + u32 st_ctime; + u32 st_ctime_nsec; + u32 __unused4; + u32 __unused5; +}; + +struct compat_flock { + short l_type; + short l_whence; + compat_off_t l_start; + compat_off_t l_len; + compat_pid_t l_pid; +}; + +#define F_GETLK64 12 /* using 'struct flock64' */ +#define F_SETLK64 13 +#define F_SETLKW64 14 + +/* + * IA32 uses 4 byte alignment for 64 bit quantities, + * so we need to pack this structure. + */ +struct compat_flock64 { + short l_type; + short l_whence; + compat_loff_t l_start; + compat_loff_t l_len; + compat_pid_t l_pid; +} __attribute__((packed)); + +struct compat_statfs { + int f_type; + int f_bsize; + int f_blocks; + int f_bfree; + int f_bavail; + int f_files; + int f_ffree; + compat_fsid_t f_fsid; + int f_namelen; /* SunOS ignores this field. */ + int f_frsize; + int f_flags; + int f_spare[4]; +}; + +#define COMPAT_RLIM_OLD_INFINITY 0x7fffffff +#define COMPAT_RLIM_INFINITY 0xffffffff + +typedef u32 compat_old_sigset_t; /* at least 32 bits */ + +#define _COMPAT_NSIG 64 +#define _COMPAT_NSIG_BPW 32 + +typedef u32 compat_sigset_word; + +#define COMPAT_OFF_T_MAX 0x7fffffff +#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL + +struct compat_ipc64_perm { + compat_key_t key; + __compat_uid32_t uid; + __compat_gid32_t gid; + __compat_uid32_t cuid; + __compat_gid32_t cgid; + unsigned short mode; + unsigned short __pad1; + unsigned short seq; + unsigned short __pad2; + compat_ulong_t unused1; + compat_ulong_t unused2; +}; + +struct compat_semid64_ds { + struct compat_ipc64_perm sem_perm; + compat_time_t sem_otime; + compat_ulong_t __unused1; + compat_time_t sem_ctime; + compat_ulong_t __unused2; + compat_ulong_t sem_nsems; + compat_ulong_t __unused3; + compat_ulong_t __unused4; +}; + +struct compat_msqid64_ds { + struct compat_ipc64_perm msg_perm; + compat_time_t msg_stime; + compat_ulong_t __unused1; + compat_time_t msg_rtime; + compat_ulong_t __unused2; + compat_time_t msg_ctime; + compat_ulong_t __unused3; + compat_ulong_t msg_cbytes; + compat_ulong_t msg_qnum; + compat_ulong_t msg_qbytes; + compat_pid_t msg_lspid; + compat_pid_t msg_lrpid; + compat_ulong_t __unused4; + compat_ulong_t __unused5; +}; + +struct compat_shmid64_ds { + struct compat_ipc64_perm shm_perm; + compat_size_t shm_segsz; + compat_time_t shm_atime; + compat_ulong_t __unused1; + compat_time_t shm_dtime; + compat_ulong_t __unused2; + compat_time_t shm_ctime; + compat_ulong_t __unused3; + compat_pid_t shm_cpid; + compat_pid_t shm_lpid; + compat_ulong_t shm_nattch; + compat_ulong_t __unused4; + compat_ulong_t __unused5; +}; + +/* + * The type of struct elf_prstatus.pr_reg in compatible core dumps. + */ +typedef struct user_regs_struct32 compat_elf_gregset_t; + +/* + * A pointer passed in from user mode. This should not + * be used for syscall parameters, just declare them + * as pointers because the syscall entry code will have + * appropriately converted them already. + */ +typedef u32 compat_uptr_t; + +static inline void __user *compat_ptr(compat_uptr_t uptr) +{ + return (void __user *)(unsigned long)uptr; +} + +static inline compat_uptr_t ptr_to_compat(void __user *uptr) +{ + return (u32)(unsigned long)uptr; +} + +static inline void __user *arch_compat_alloc_user_space(long len) +{ + struct pt_regs *regs = task_pt_regs(current); + return (void __user *)regs->sp - len; +} + +static inline int is_compat_task(void) +{ + return current_thread_info()->status & TS_COMPAT; +} + +#endif /* _ASM_X86_COMPAT_H */ diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h new file mode 100644 index 00000000000..4564c8e28a3 --- /dev/null +++ b/arch/x86/include/asm/cpu.h @@ -0,0 +1,37 @@ +#ifndef _ASM_X86_CPU_H +#define _ASM_X86_CPU_H + +#include <linux/device.h> +#include <linux/cpu.h> +#include <linux/topology.h> +#include <linux/nodemask.h> +#include <linux/percpu.h> + +#ifdef CONFIG_SMP + +extern void prefill_possible_map(void); + +#else /* CONFIG_SMP */ + +static inline void prefill_possible_map(void) {} + +#define cpu_physical_id(cpu) boot_cpu_physical_apicid +#define safe_smp_processor_id() 0 +#define stack_smp_processor_id() 0 + +#endif /* CONFIG_SMP */ + +struct x86_cpu { + struct cpu cpu; +}; + +#ifdef CONFIG_HOTPLUG_CPU +extern int arch_register_cpu(int num); +extern void arch_unregister_cpu(int); +#endif + +DECLARE_PER_CPU(int, cpu_state); + +int mwait_usable(const struct cpuinfo_x86 *); + +#endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h new file mode 100644 index 00000000000..8d67d428b0f --- /dev/null +++ b/arch/x86/include/asm/cpufeature.h @@ -0,0 +1,396 @@ +/* + * Defines x86 CPU feature bits + */ +#ifndef _ASM_X86_CPUFEATURE_H +#define _ASM_X86_CPUFEATURE_H + +#include <asm/required-features.h> + +#define NCAPINTS 10 /* N 32-bit words worth of info */ + +/* + * Note: If the comment begins with a quoted string, that string is used + * in /proc/cpuinfo instead of the macro name. If the string is "", + * this feature bit is not displayed in /proc/cpuinfo at all. + */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ +#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ +#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */ +#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ +#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Exception */ +#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ +#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ +#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ +#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ +#define X86_FEATURE_CMOV (0*32+15) /* CMOV instructions */ + /* (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ +#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ +#define X86_FEATURE_PN (0*32+18) /* Processor serial number */ +#define X86_FEATURE_CLFLSH (0*32+19) /* "clflush" CLFLUSH instruction */ +#define X86_FEATURE_DS (0*32+21) /* "dts" Debug Store */ +#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ +#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_XMM (0*32+25) /* "sse" */ +#define X86_FEATURE_XMM2 (0*32+26) /* "sse2" */ +#define X86_FEATURE_SELFSNOOP (0*32+27) /* "ss" CPU self snoop */ +#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_ACC (0*32+29) /* "tm" Automatic clock control */ +#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ +#define X86_FEATURE_PBE (0*32+31) /* Pending Break Enable */ + +/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ +/* Don't duplicate feature flags which are redundant with Intel! */ +#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */ +#define X86_FEATURE_MP (1*32+19) /* MP Capable. */ +#define X86_FEATURE_NX (1*32+20) /* Execute Disable */ +#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT (1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_GBPAGES (1*32+26) /* "pdpe1gb" GB pages */ +#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */ +#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ +#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ +#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ + +/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ +#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */ +#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */ +#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */ + +/* Other features, Linux-defined mapping, word 3 */ +/* This range is used for feature bits which conflict or are synthesized */ +#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ +/* cpu types for specific tunings: */ +#define X86_FEATURE_K8 (3*32+ 4) /* "" Opteron, Athlon64 */ +#define X86_FEATURE_K7 (3*32+ 5) /* "" Athlon */ +#define X86_FEATURE_P3 (3*32+ 6) /* "" P3 */ +#define X86_FEATURE_P4 (3*32+ 7) /* "" P4 */ +#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */ +#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ +#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */ +#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */ +#define X86_FEATURE_SYSENTER32 (3*32+15) /* "" sysenter in ia32 userspace */ +#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well */ +#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* "" Mfence synchronizes RDTSC */ +#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */ +#define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */ +#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ + /* 21 available, was AMD_C1E */ +#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ +#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ +#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ +#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ +#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ +#define X86_FEATURE_PCLMULQDQ (4*32+ 1) /* PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 (4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_MWAIT (4*32+ 3) /* "monitor" Monitor/Mwait support */ +#define X86_FEATURE_DSCPL (4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ +#define X86_FEATURE_VMX (4*32+ 5) /* Hardware virtualization */ +#define X86_FEATURE_SMX (4*32+ 6) /* Safer mode */ +#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */ +#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ +#define X86_FEATURE_CID (4*32+10) /* Context ID */ +#define X86_FEATURE_FMA (4*32+12) /* Fused multiply-add */ +#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ +#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ +#define X86_FEATURE_PDCM (4*32+15) /* Performance Capabilities */ +#define X86_FEATURE_PCID (4*32+17) /* Process Context Identifiers */ +#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ +#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ +#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ +#define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */ +#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER (4*32+24) /* Tsc deadline timer */ +#define X86_FEATURE_AES (4*32+25) /* AES instructions */ +#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ +#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ +#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ +#define X86_FEATURE_F16C (4*32+29) /* 16-bit fp conversions */ +#define X86_FEATURE_RDRAND (4*32+30) /* The RDRAND instruction */ +#define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */ + +/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ +#define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */ +#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* "rng_en" RNG enabled */ +#define X86_FEATURE_XCRYPT (5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ +#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* "ace_en" on-CPU crypto enabled */ +#define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */ +#define X86_FEATURE_PHE (5*32+10) /* PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN (5*32+11) /* PHE enabled */ +#define X86_FEATURE_PMM (5*32+12) /* PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN (5*32+13) /* PMM enabled */ + +/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ +#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ +#define X86_FEATURE_SVM (6*32+ 2) /* Secure virtual machine */ +#define X86_FEATURE_EXTAPIC (6*32+ 3) /* Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY (6*32+ 4) /* CR8 in 32-bit mode */ +#define X86_FEATURE_ABM (6*32+ 5) /* Advanced bit manipulation */ +#define X86_FEATURE_SSE4A (6*32+ 6) /* SSE-4A */ +#define X86_FEATURE_MISALIGNSSE (6*32+ 7) /* Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */ +#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */ +#define X86_FEATURE_XOP (6*32+11) /* extended AVX instructions */ +#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ +#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ +#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */ +#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */ +#define X86_FEATURE_TCE (6*32+17) /* translation cache extension */ +#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ +#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ +#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */ + +/* + * Auxiliary flags: Linux defined - For features scattered in various + * CPUID levels like 0x6, 0xA etc, word 7 + */ +#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ +#define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */ +#define X86_FEATURE_EPB (7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */ +#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ +#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ +#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */ + +/* Virtualization flags: Linux defined, word 8 */ +#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ +#define X86_FEATURE_VNMI (8*32+ 1) /* Intel Virtual NMI */ +#define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */ +#define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */ +#define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */ +#define X86_FEATURE_NPT (8*32+ 5) /* AMD Nested Page Table support */ +#define X86_FEATURE_LBRV (8*32+ 6) /* AMD LBR Virtualization support */ +#define X86_FEATURE_SVML (8*32+ 7) /* "svm_lock" AMD SVM locking MSR */ +#define X86_FEATURE_NRIPS (8*32+ 8) /* "nrip_save" AMD SVM next_rip save */ +#define X86_FEATURE_TSCRATEMSR (8*32+ 9) /* "tsc_scale" AMD TSC scaling support */ +#define X86_FEATURE_VMCBCLEAN (8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */ +#define X86_FEATURE_FLUSHBYASID (8*32+11) /* AMD flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (8*32+12) /* AMD Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (8*32+13) /* AMD filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (8*32+14) /* AMD pause filter threshold */ + + +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ +#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ +#define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */ +#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ +#define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ + +#if defined(__KERNEL__) && !defined(__ASSEMBLY__) + +#include <asm/asm.h> +#include <linux/bitops.h> + +extern const char * const x86_cap_flags[NCAPINTS*32]; +extern const char * const x86_power_flags[32]; + +#define test_cpu_cap(c, bit) \ + test_bit(bit, (unsigned long *)((c)->x86_capability)) + +#define REQUIRED_MASK_BIT_SET(bit) \ + ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \ + (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \ + (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \ + (((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3)) || \ + (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) || \ + (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) || \ + (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \ + (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) || \ + (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \ + (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) + +#define cpu_has(c, bit) \ + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ + test_cpu_cap(c, bit)) + +#define this_cpu_has(bit) \ + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ + x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability)) + +#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) + +#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) +#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) +#define setup_clear_cpu_cap(bit) do { \ + clear_cpu_cap(&boot_cpu_data, bit); \ + set_bit(bit, (unsigned long *)cpu_caps_cleared); \ +} while (0) +#define setup_force_cpu_cap(bit) do { \ + set_cpu_cap(&boot_cpu_data, bit); \ + set_bit(bit, (unsigned long *)cpu_caps_set); \ +} while (0) + +#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) +#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME) +#define cpu_has_de boot_cpu_has(X86_FEATURE_DE) +#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) +#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) +#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE) +#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE) +#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) +#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP) +#define cpu_has_mtrr boot_cpu_has(X86_FEATURE_MTRR) +#define cpu_has_mmx boot_cpu_has(X86_FEATURE_MMX) +#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) +#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) +#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2) +#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3) +#define cpu_has_ssse3 boot_cpu_has(X86_FEATURE_SSSE3) +#define cpu_has_aes boot_cpu_has(X86_FEATURE_AES) +#define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) +#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) +#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP) +#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) +#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR) +#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR) +#define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR) +#define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE) +#define cpu_has_xstore_enabled boot_cpu_has(X86_FEATURE_XSTORE_EN) +#define cpu_has_xcrypt boot_cpu_has(X86_FEATURE_XCRYPT) +#define cpu_has_xcrypt_enabled boot_cpu_has(X86_FEATURE_XCRYPT_EN) +#define cpu_has_ace2 boot_cpu_has(X86_FEATURE_ACE2) +#define cpu_has_ace2_enabled boot_cpu_has(X86_FEATURE_ACE2_EN) +#define cpu_has_phe boot_cpu_has(X86_FEATURE_PHE) +#define cpu_has_phe_enabled boot_cpu_has(X86_FEATURE_PHE_EN) +#define cpu_has_pmm boot_cpu_has(X86_FEATURE_PMM) +#define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN) +#define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) +#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) +#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) +#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS) +#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) +#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) +#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) +#define cpu_has_xmm4_1 boot_cpu_has(X86_FEATURE_XMM4_1) +#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) +#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) +#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) +#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) +#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) +#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) +#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) +#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) +#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) + +#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) +# define cpu_has_invlpg 1 +#else +# define cpu_has_invlpg (boot_cpu_data.x86 > 3) +#endif + +#ifdef CONFIG_X86_64 + +#undef cpu_has_vme +#define cpu_has_vme 0 + +#undef cpu_has_pae +#define cpu_has_pae ___BUG___ + +#undef cpu_has_mp +#define cpu_has_mp 1 + +#undef cpu_has_k6_mtrr +#define cpu_has_k6_mtrr 0 + +#undef cpu_has_cyrix_arr +#define cpu_has_cyrix_arr 0 + +#undef cpu_has_centaur_mcr +#define cpu_has_centaur_mcr 0 + +#endif /* CONFIG_X86_64 */ + +#if __GNUC__ >= 4 +/* + * Static testing of CPU features. Used the same as boot_cpu_has(). + * These are only valid after alternatives have run, but will statically + * patch the target code for additional performance. + * + */ +static __always_inline __pure bool __static_cpu_has(u16 bit) +{ +#if __GNUC__ > 4 || __GNUC_MINOR__ >= 5 + asm goto("1: jmp %l[t_no]\n" + "2:\n" + ".section .altinstructions,\"a\"\n" + " .long 1b - .\n" + " .long 0\n" /* no replacement */ + " .word %P0\n" /* feature bit */ + " .byte 2b - 1b\n" /* source len */ + " .byte 0\n" /* replacement len */ + ".previous\n" + /* skipping size check since replacement size = 0 */ + : : "i" (bit) : : t_no); + return true; + t_no: + return false; +#else + u8 flag; + /* Open-coded due to __stringify() in ALTERNATIVE() */ + asm volatile("1: movb $0,%0\n" + "2:\n" + ".section .altinstructions,\"a\"\n" + " .long 1b - .\n" + " .long 3f - .\n" + " .word %P1\n" /* feature bit */ + " .byte 2b - 1b\n" /* source len */ + " .byte 4f - 3f\n" /* replacement len */ + ".previous\n" + ".section .discard,\"aw\",@progbits\n" + " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ + ".previous\n" + ".section .altinstr_replacement,\"ax\"\n" + "3: movb $1,%0\n" + "4:\n" + ".previous\n" + : "=qm" (flag) : "i" (bit)); + return flag; +#endif +} + +#define static_cpu_has(bit) \ +( \ + __builtin_constant_p(boot_cpu_has(bit)) ? \ + boot_cpu_has(bit) : \ + __builtin_constant_p(bit) ? \ + __static_cpu_has(bit) : \ + boot_cpu_has(bit) \ +) +#else +/* + * gcc 3.x is too stupid to do the static test; fall back to dynamic. + */ +#define static_cpu_has(bit) boot_cpu_has(bit) +#endif + +#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ + +#endif /* _ASM_X86_CPUFEATURE_H */ diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h new file mode 100644 index 00000000000..61c852fa346 --- /dev/null +++ b/arch/x86/include/asm/cpumask.h @@ -0,0 +1,14 @@ +#ifndef _ASM_X86_CPUMASK_H +#define _ASM_X86_CPUMASK_H +#ifndef __ASSEMBLY__ +#include <linux/cpumask.h> + +extern cpumask_var_t cpu_callin_mask; +extern cpumask_var_t cpu_callout_mask; +extern cpumask_var_t cpu_initialized_mask; +extern cpumask_var_t cpu_sibling_setup_mask; + +extern void setup_cpu_local_masks(void); + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_CPUMASK_H */ diff --git a/arch/x86/include/asm/cputime.h b/arch/x86/include/asm/cputime.h new file mode 100644 index 00000000000..6d68ad7e0ea --- /dev/null +++ b/arch/x86/include/asm/cputime.h @@ -0,0 +1 @@ +#include <asm-generic/cputime.h> diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h new file mode 100644 index 00000000000..dfcc006f734 --- /dev/null +++ b/arch/x86/include/asm/crash.h @@ -0,0 +1,75 @@ +#ifndef _ASM_I386_CRASH_H +#define _ASM_I386_CRASH_H + +/* + * linux/include/asm-i386/crash.h + * + * Copyright (c) 2004 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifdef __KERNEL__ + +#include <linux/mm.h> +#include <linux/highmem.h> +#include <asm/mmzone.h> + +extern int page_is_ram(unsigned long); + +static inline void * +map_virtual(u64 offset, struct page **pp) +{ + struct page *page; + unsigned long pfn; + void *vaddr; + + pfn = (unsigned long)(offset >> PAGE_SHIFT); + + if (!page_is_ram(pfn)) { + printk(KERN_INFO + "crash memory driver: !page_is_ram(pfn: %lx)\n", pfn); + return NULL; + } + + if (!pfn_valid(pfn)) { + printk(KERN_INFO + "crash memory driver: invalid pfn: %lx )\n", pfn); + return NULL; + } + + page = pfn_to_page(pfn); + + vaddr = kmap(page); + if (!vaddr) { + printk(KERN_INFO + "crash memory driver: pfn: %lx kmap(page: %lx) failed\n", + pfn, (unsigned long)page); + return NULL; + } + + *pp = page; + return (vaddr + (offset & (PAGE_SIZE-1))); +} + +static inline void unmap_virtual(struct page *page) +{ + kunmap(page); +} + +#endif /* __KERNEL__ */ + +#endif /* _ASM_I386_CRASH_H */ diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h new file mode 100644 index 00000000000..4d447b732d8 --- /dev/null +++ b/arch/x86/include/asm/current.h @@ -0,0 +1,21 @@ +#ifndef _ASM_X86_CURRENT_H +#define _ASM_X86_CURRENT_H + +#include <linux/compiler.h> +#include <asm/percpu.h> + +#ifndef __ASSEMBLY__ +struct task_struct; + +DECLARE_PER_CPU(struct task_struct *, current_task); + +static __always_inline struct task_struct *get_current(void) +{ + return percpu_read_stable(current_task); +} + +#define current get_current() + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_CURRENT_H */ diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h new file mode 100644 index 00000000000..b903d5ea394 --- /dev/null +++ b/arch/x86/include/asm/debugreg.h @@ -0,0 +1,128 @@ +#ifndef _ASM_X86_DEBUGREG_H +#define _ASM_X86_DEBUGREG_H + + +/* Indicate the register numbers for a number of the specific + debug registers. Registers 0-3 contain the addresses we wish to trap on */ +#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */ +#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */ + +#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */ +#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */ + +/* Define a few things for the status register. We can use this to determine + which debugging register was responsible for the trap. The other bits + are either reserved or not of interest to us. */ + +/* Define reserved bits in DR6 which are always set to 1 */ +#define DR6_RESERVED (0xFFFF0FF0) + +#define DR_TRAP0 (0x1) /* db0 */ +#define DR_TRAP1 (0x2) /* db1 */ +#define DR_TRAP2 (0x4) /* db2 */ +#define DR_TRAP3 (0x8) /* db3 */ +#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) + +#define DR_STEP (0x4000) /* single-step */ +#define DR_SWITCH (0x8000) /* task switch */ + +/* Now define a bunch of things for manipulating the control register. + The top two bytes of the control register consist of 4 fields of 4 + bits - each field corresponds to one of the four debug registers, + and indicates what types of access we trap on, and how large the data + field is that we are looking at */ + +#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */ +#define DR_CONTROL_SIZE 4 /* 4 control bits per register */ + +#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */ +#define DR_RW_WRITE (0x1) +#define DR_RW_READ (0x3) + +#define DR_LEN_1 (0x0) /* Settings for data length to trap on */ +#define DR_LEN_2 (0x4) +#define DR_LEN_4 (0xC) +#define DR_LEN_8 (0x8) + +/* The low byte to the control register determine which registers are + enabled. There are 4 fields of two bits. One bit is "local", meaning + that the processor will reset the bit after a task switch and the other + is global meaning that we have to explicitly reset the bit. With linux, + you can use either one, since we explicitly zero the register when we enter + kernel mode. */ + +#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ +#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ +#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */ +#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */ +#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ + +#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ +#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */ + +/* The second byte to the control register has a few special things. + We can slow the instruction pipeline for instructions coming via the + gdt or the ldt if we want to. I am not sure why this is an advantage */ + +#ifdef __i386__ +#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */ +#else +#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */ +#endif + +#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ +#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ + +/* + * HW breakpoint additions + */ +#ifdef __KERNEL__ + +DECLARE_PER_CPU(unsigned long, cpu_dr7); + +static inline void hw_breakpoint_disable(void) +{ + /* Zero the control register for HW Breakpoint */ + set_debugreg(0UL, 7); + + /* Zero-out the individual HW breakpoint address registers */ + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); +} + +static inline int hw_breakpoint_active(void) +{ + return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; +} + +extern void aout_dump_debugregs(struct user *dump); + +extern void hw_breakpoint_restore(void); + +#ifdef CONFIG_X86_64 +DECLARE_PER_CPU(int, debug_stack_usage); +static inline void debug_stack_usage_inc(void) +{ + __get_cpu_var(debug_stack_usage)++; +} +static inline void debug_stack_usage_dec(void) +{ + __get_cpu_var(debug_stack_usage)--; +} +int is_debug_stack(unsigned long addr); +void debug_stack_set_zero(void); +void debug_stack_reset(void); +#else /* !X86_64 */ +static inline int is_debug_stack(unsigned long addr) { return 0; } +static inline void debug_stack_set_zero(void) { } +static inline void debug_stack_reset(void) { } +static inline void debug_stack_usage_inc(void) { } +static inline void debug_stack_usage_dec(void) { } +#endif /* X86_64 */ + + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h new file mode 100644 index 00000000000..9b3b4f2754c --- /dev/null +++ b/arch/x86/include/asm/delay.h @@ -0,0 +1,8 @@ +#ifndef _ASM_X86_DELAY_H +#define _ASM_X86_DELAY_H + +#include <asm-generic/delay.h> + +void use_tsc_delay(void); + +#endif /* _ASM_X86_DELAY_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h new file mode 100644 index 00000000000..00b222fa06b --- /dev/null +++ b/arch/x86/include/asm/desc.h @@ -0,0 +1,432 @@ +#ifndef _ASM_X86_DESC_H +#define _ASM_X86_DESC_H + +#include <asm/desc_defs.h> +#include <asm/ldt.h> +#include <asm/mmu.h> + +#include <linux/mm_types.h> +#include <linux/smp.h> + +static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info) +{ + desc->limit0 = info->limit & 0x0ffff; + + desc->base0 = (info->base_addr & 0x0000ffff); + desc->base1 = (info->base_addr & 0x00ff0000) >> 16; + + desc->type = (info->read_exec_only ^ 1) << 1; + desc->type |= info->contents << 2; + + desc->s = 1; + desc->dpl = 0x3; + desc->p = info->seg_not_present ^ 1; + desc->limit = (info->limit & 0xf0000) >> 16; + desc->avl = info->useable; + desc->d = info->seg_32bit; + desc->g = info->limit_in_pages; + + desc->base2 = (info->base_addr & 0xff000000) >> 24; + /* + * Don't allow setting of the lm bit. It would confuse + * user_64bit_mode and would get overridden by sysret anyway. + */ + desc->l = 0; +} + +extern struct desc_ptr idt_descr; +extern gate_desc idt_table[]; +extern struct desc_ptr nmi_idt_descr; +extern gate_desc nmi_idt_table[]; + +struct gdt_page { + struct desc_struct gdt[GDT_ENTRIES]; +} __attribute__((aligned(PAGE_SIZE))); + +DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); + +static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) +{ + return per_cpu(gdt_page, cpu).gdt; +} + +#ifdef CONFIG_X86_64 + +static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func, + unsigned dpl, unsigned ist, unsigned seg) +{ + gate->offset_low = PTR_LOW(func); + gate->segment = __KERNEL_CS; + gate->ist = ist; + gate->p = 1; + gate->dpl = dpl; + gate->zero0 = 0; + gate->zero1 = 0; + gate->type = type; + gate->offset_middle = PTR_MIDDLE(func); + gate->offset_high = PTR_HIGH(func); +} + +#else +static inline void pack_gate(gate_desc *gate, unsigned char type, + unsigned long base, unsigned dpl, unsigned flags, + unsigned short seg) +{ + gate->a = (seg << 16) | (base & 0xffff); + gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8); +} + +#endif + +static inline int desc_empty(const void *ptr) +{ + const u32 *desc = ptr; + + return !(desc[0] | desc[1]); +} + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define load_TR_desc() native_load_tr_desc() +#define load_gdt(dtr) native_load_gdt(dtr) +#define load_idt(dtr) native_load_idt(dtr) +#define load_tr(tr) asm volatile("ltr %0"::"m" (tr)) +#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt)) + +#define store_gdt(dtr) native_store_gdt(dtr) +#define store_idt(dtr) native_store_idt(dtr) +#define store_tr(tr) (tr = native_store_tr()) + +#define load_TLS(t, cpu) native_load_tls(t, cpu) +#define set_ldt native_set_ldt +#ifdef CONFIG_X86_32 +#define load_user_cs_desc native_load_user_cs_desc +#endif /*CONFIG_X86_32*/ + +#define write_ldt_entry(dt, entry, desc) native_write_ldt_entry(dt, entry, desc) +#define write_gdt_entry(dt, entry, desc, type) native_write_gdt_entry(dt, entry, desc, type) +#define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g) + +static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) +{ +} + +static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) +{ +} +#endif /* CONFIG_PARAVIRT */ + +#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt)) + +static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate) +{ + memcpy(&idt[entry], gate, sizeof(*gate)); +} + +static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc) +{ + memcpy(&ldt[entry], desc, 8); +} + +static inline void +native_write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, int type) +{ + unsigned int size; + + switch (type) { + case DESC_TSS: size = sizeof(tss_desc); break; + case DESC_LDT: size = sizeof(ldt_desc); break; + default: size = sizeof(*gdt); break; + } + + memcpy(&gdt[entry], desc, size); +} + +static inline void pack_descriptor(struct desc_struct *desc, unsigned long base, + unsigned long limit, unsigned char type, + unsigned char flags) +{ + desc->a = ((base & 0xffff) << 16) | (limit & 0xffff); + desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) | + (limit & 0x000f0000) | ((type & 0xff) << 8) | + ((flags & 0xf) << 20); + desc->p = 1; +} + + +static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size) +{ +#ifdef CONFIG_X86_64 + struct ldttss_desc64 *desc = d; + + memset(desc, 0, sizeof(*desc)); + + desc->limit0 = size & 0xFFFF; + desc->base0 = PTR_LOW(addr); + desc->base1 = PTR_MIDDLE(addr) & 0xFF; + desc->type = type; + desc->p = 1; + desc->limit1 = (size >> 16) & 0xF; + desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF; + desc->base3 = PTR_HIGH(addr); +#else + pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0); +#endif +} + +static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) +{ + struct desc_struct *d = get_cpu_gdt_table(cpu); + tss_desc tss; + + /* + * sizeof(unsigned long) coming from an extra "long" at the end + * of the iobitmap. See tss_struct definition in processor.h + * + * -1? seg base+limit should be pointing to the address of the + * last valid byte + */ + set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS, + IO_BITMAP_OFFSET + IO_BITMAP_BYTES + + sizeof(unsigned long) - 1); + write_gdt_entry(d, entry, &tss, DESC_TSS); +} + +#define set_tss_desc(cpu, addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) + +static inline void native_set_ldt(const void *addr, unsigned int entries) +{ + if (likely(entries == 0)) + asm volatile("lldt %w0"::"q" (0)); + else { + unsigned cpu = smp_processor_id(); + ldt_desc ldt; + + set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT, + entries * LDT_ENTRY_SIZE - 1); + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, + &ldt, DESC_LDT); + asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); + } +} + +static inline void native_load_tr_desc(void) +{ + asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); +} + +static inline void native_load_gdt(const struct desc_ptr *dtr) +{ + asm volatile("lgdt %0"::"m" (*dtr)); +} + +static inline void native_load_idt(const struct desc_ptr *dtr) +{ + asm volatile("lidt %0"::"m" (*dtr)); +} + +static inline void native_store_gdt(struct desc_ptr *dtr) +{ + asm volatile("sgdt %0":"=m" (*dtr)); +} + +static inline void native_store_idt(struct desc_ptr *dtr) +{ + asm volatile("sidt %0":"=m" (*dtr)); +} + +static inline unsigned long native_store_tr(void) +{ + unsigned long tr; + + asm volatile("str %0":"=r" (tr)); + + return tr; +} + +static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) +{ + struct desc_struct *gdt = get_cpu_gdt_table(cpu); + unsigned int i; + + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) + gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; +} + +#define _LDT_empty(info) \ + ((info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0) + +#ifdef CONFIG_X86_64 +#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0)) +#else +#define LDT_empty(info) (_LDT_empty(info)) +#endif + +static inline void clear_LDT(void) +{ + set_ldt(NULL, 0); +} + +/* + * load one particular LDT into the current CPU + */ +static inline void load_LDT_nolock(mm_context_t *pc) +{ + set_ldt(pc->ldt, pc->size); +} + +static inline void load_LDT(mm_context_t *pc) +{ + preempt_disable(); + load_LDT_nolock(pc); + preempt_enable(); +} + +static inline unsigned long get_desc_base(const struct desc_struct *desc) +{ + return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); +} + +static inline void set_desc_base(struct desc_struct *desc, unsigned long base) +{ + desc->base0 = base & 0xffff; + desc->base1 = (base >> 16) & 0xff; + desc->base2 = (base >> 24) & 0xff; +} + +static inline unsigned long get_desc_limit(const struct desc_struct *desc) +{ + return desc->limit0 | (desc->limit << 16); +} + +static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) +{ + desc->limit0 = limit & 0xffff; + desc->limit = (limit >> 16) & 0xf; +} + +#ifdef CONFIG_X86_64 +static inline void set_nmi_gate(int gate, void *addr) +{ + gate_desc s; + + pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS); + write_idt_entry(nmi_idt_table, gate, &s); +} +#endif + +static inline void _set_gate(int gate, unsigned type, void *addr, + unsigned dpl, unsigned ist, unsigned seg) +{ + gate_desc s; + + pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg); + /* + * does not need to be atomic because it is only done once at + * setup time + */ + write_idt_entry(idt_table, gate, &s); +} + +/* + * This needs to use 'idt_table' rather than 'idt', and + * thus use the _nonmapped_ version of the IDT, as the + * Pentium F0 0F bugfix can have resulted in the mapped + * IDT being write-protected. + */ +static inline void set_intr_gate(unsigned int n, void *addr) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); +} + +extern int first_system_vector; +/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ +extern unsigned long used_vectors[]; + +static inline void alloc_system_vector(int vector) +{ + if (!test_bit(vector, used_vectors)) { + set_bit(vector, used_vectors); + if (first_system_vector > vector) + first_system_vector = vector; + } else { + BUG(); + } +} + +static inline void alloc_intr_gate(unsigned int n, void *addr) +{ + alloc_system_vector(n); + set_intr_gate(n, addr); +} + +/* + * This routine sets up an interrupt gate at directory privilege level 3. + */ +static inline void set_system_intr_gate(unsigned int n, void *addr) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS); +} + +static inline void set_system_trap_gate(unsigned int n, void *addr) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS); +} + +static inline void set_trap_gate(unsigned int n, void *addr) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS); +} + +static inline void set_task_gate(unsigned int n, unsigned int gdt_entry) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3)); +} + +static inline void set_intr_gate_ist(int n, void *addr, unsigned ist) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS); +} + +static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist) +{ + BUG_ON((unsigned)n > 0xFF); + _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); +} + +#ifdef CONFIG_X86_32 +static inline void set_user_cs(struct desc_struct *desc, unsigned long limit) +{ + limit = (limit - 1) / PAGE_SIZE; + desc->a = limit & 0xffff; + desc->b = (limit & 0xf0000) | 0x00c0fb00; +} + +static inline void native_load_user_cs_desc(int cpu, struct mm_struct *mm) +{ + get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs; +} + +#define arch_add_exec_range arch_add_exec_range +#define arch_remove_exec_range arch_remove_exec_range +#define arch_flush_exec_range arch_flush_exec_range +extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit); +extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit); +extern void arch_flush_exec_range(struct mm_struct *mm); +#endif /* CONFIG_X86_32 */ + +#endif /* _ASM_X86_DESC_H */ diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h new file mode 100644 index 00000000000..278441f3985 --- /dev/null +++ b/arch/x86/include/asm/desc_defs.h @@ -0,0 +1,101 @@ +/* Written 2000 by Andi Kleen */ +#ifndef _ASM_X86_DESC_DEFS_H +#define _ASM_X86_DESC_DEFS_H + +/* + * Segment descriptor structure definitions, usable from both x86_64 and i386 + * archs. + */ + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> + +/* + * FIXME: Accessing the desc_struct through its fields is more elegant, + * and should be the one valid thing to do. However, a lot of open code + * still touches the a and b accessors, and doing this allow us to do it + * incrementally. We keep the signature as a struct, rather than an union, + * so we can get rid of it transparently in the future -- glommer + */ +/* 8 byte segment descriptor */ +struct desc_struct { + union { + struct { + unsigned int a; + unsigned int b; + }; + struct { + u16 limit0; + u16 base0; + unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1; + unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8; + }; + }; +} __attribute__((packed)); + +#define GDT_ENTRY_INIT(flags, base, limit) { { { \ + .a = ((limit) & 0xffff) | (((base) & 0xffff) << 16), \ + .b = (((base) & 0xff0000) >> 16) | (((flags) & 0xf0ff) << 8) | \ + ((limit) & 0xf0000) | ((base) & 0xff000000), \ + } } } + +enum { + GATE_INTERRUPT = 0xE, + GATE_TRAP = 0xF, + GATE_CALL = 0xC, + GATE_TASK = 0x5, +}; + +/* 16byte gate */ +struct gate_struct64 { + u16 offset_low; + u16 segment; + unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; + u16 offset_middle; + u32 offset_high; + u32 zero1; +} __attribute__((packed)); + +#define PTR_LOW(x) ((unsigned long long)(x) & 0xFFFF) +#define PTR_MIDDLE(x) (((unsigned long long)(x) >> 16) & 0xFFFF) +#define PTR_HIGH(x) ((unsigned long long)(x) >> 32) + +enum { + DESC_TSS = 0x9, + DESC_LDT = 0x2, + DESCTYPE_S = 0x10, /* !system */ +}; + +/* LDT or TSS descriptor in the GDT. 16 bytes. */ +struct ldttss_desc64 { + u16 limit0; + u16 base0; + unsigned base1 : 8, type : 5, dpl : 2, p : 1; + unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; + u32 base3; + u32 zero1; +} __attribute__((packed)); + +#ifdef CONFIG_X86_64 +typedef struct gate_struct64 gate_desc; +typedef struct ldttss_desc64 ldt_desc; +typedef struct ldttss_desc64 tss_desc; +#define gate_offset(g) ((g).offset_low | ((unsigned long)(g).offset_middle << 16) | ((unsigned long)(g).offset_high << 32)) +#define gate_segment(g) ((g).segment) +#else +typedef struct desc_struct gate_desc; +typedef struct desc_struct ldt_desc; +typedef struct desc_struct tss_desc; +#define gate_offset(g) (((g).b & 0xffff0000) | ((g).a & 0x0000ffff)) +#define gate_segment(g) ((g).a >> 16) +#endif + +struct desc_ptr { + unsigned short size; + unsigned long address; +} __attribute__((packed)) ; + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_X86_DESC_DEFS_H */ diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h new file mode 100644 index 00000000000..63a2a03d7d5 --- /dev/null +++ b/arch/x86/include/asm/device.h @@ -0,0 +1,19 @@ +#ifndef _ASM_X86_DEVICE_H +#define _ASM_X86_DEVICE_H + +struct dev_archdata { +#ifdef CONFIG_ACPI + void *acpi_handle; +#endif +#ifdef CONFIG_X86_64 +struct dma_map_ops *dma_ops; +#endif +#if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU) + void *iommu; /* hook for IOMMU specific extension */ +#endif +}; + +struct pdev_archdata { +}; + +#endif /* _ASM_X86_DEVICE_H */ diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h new file mode 100644 index 00000000000..ced283ac79d --- /dev/null +++ b/arch/x86/include/asm/div64.h @@ -0,0 +1,66 @@ +#ifndef _ASM_X86_DIV64_H +#define _ASM_X86_DIV64_H + +#ifdef CONFIG_X86_32 + +#include <linux/types.h> +#include <linux/log2.h> + +/* + * do_div() is NOT a C function. It wants to return + * two values (the quotient and the remainder), but + * since that doesn't work very well in C, what it + * does is: + * + * - modifies the 64-bit dividend _in_place_ + * - returns the 32-bit remainder + * + * This ends up being the most efficient "calling + * convention" on x86. + */ +#define do_div(n, base) \ +({ \ + unsigned long __upper, __low, __high, __mod, __base; \ + __base = (base); \ + if (__builtin_constant_p(__base) && is_power_of_2(__base)) { \ + __mod = n & (__base - 1); \ + n >>= ilog2(__base); \ + } else { \ + asm("" : "=a" (__low), "=d" (__high) : "A" (n));\ + __upper = __high; \ + if (__high) { \ + __upper = __high % (__base); \ + __high = __high / (__base); \ + } \ + asm("divl %2" : "=a" (__low), "=d" (__mod) \ + : "rm" (__base), "0" (__low), "1" (__upper)); \ + asm("" : "=A" (n) : "a" (__low), "d" (__high)); \ + } \ + __mod; \ +}) + +static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) +{ + union { + u64 v64; + u32 v32[2]; + } d = { dividend }; + u32 upper; + + upper = d.v32[1]; + d.v32[1] = 0; + if (upper >= divisor) { + d.v32[1] = upper / divisor; + upper %= divisor; + } + asm ("divl %2" : "=a" (d.v32[0]), "=d" (*remainder) : + "rm" (divisor), "0" (d.v32[0]), "1" (upper)); + return d.v64; +} +#define div_u64_rem div_u64_rem + +#else +# include <asm-generic/div64.h> +#endif /* CONFIG_X86_32 */ + +#endif /* _ASM_X86_DIV64_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h new file mode 100644 index 00000000000..ed3065fd631 --- /dev/null +++ b/arch/x86/include/asm/dma-mapping.h @@ -0,0 +1,157 @@ +#ifndef _ASM_X86_DMA_MAPPING_H +#define _ASM_X86_DMA_MAPPING_H + +/* + * IOMMU interface. See Documentation/DMA-API-HOWTO.txt and + * Documentation/DMA-API.txt for documentation. + */ + +#include <linux/kmemcheck.h> +#include <linux/scatterlist.h> +#include <linux/dma-debug.h> +#include <linux/dma-attrs.h> +#include <asm/io.h> +#include <asm/swiotlb.h> +#include <asm-generic/dma-coherent.h> + +#ifdef CONFIG_ISA +# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) +#else +# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) +#endif + +#define DMA_ERROR_CODE 0 + +extern int iommu_merge; +extern struct device x86_dma_fallback_dev; +extern int panic_on_overflow; + +extern struct dma_map_ops *dma_ops; + +static inline struct dma_map_ops *get_dma_ops(struct device *dev) +{ +#ifdef CONFIG_X86_32 + return dma_ops; +#else + if (unlikely(!dev) || !dev->archdata.dma_ops) + return dma_ops; + else + return dev->archdata.dma_ops; +#endif +} + +#include <asm-generic/dma-mapping-common.h> + +/* Make sure we keep the same behaviour */ +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + if (ops->mapping_error) + return ops->mapping_error(dev, dma_addr); + + return (dma_addr == DMA_ERROR_CODE); +} + +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) + +extern int dma_supported(struct device *hwdev, u64 mask); +extern int dma_set_mask(struct device *dev, u64 mask); + +extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag); + +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return 0; + + return addr + size - 1 <= *dev->dma_mask; +} + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return paddr; +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return daddr; +} + +static inline void +dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction dir) +{ + flush_write_buffers(); +} + +static inline unsigned long dma_alloc_coherent_mask(struct device *dev, + gfp_t gfp) +{ + unsigned long dma_mask = 0; + + dma_mask = dev->coherent_dma_mask; + if (!dma_mask) + dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32); + + return dma_mask; +} + +static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) +{ + unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp); + + if (dma_mask <= DMA_BIT_MASK(24)) + gfp |= GFP_DMA; +#ifdef CONFIG_X86_64 + if (dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) + gfp |= GFP_DMA32; +#endif + return gfp; +} + +static inline void * +dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + void *memory; + + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + + if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) + return memory; + + if (!dev) + dev = &x86_dma_fallback_dev; + + if (!is_device_dma_capable(dev)) + return NULL; + + if (!ops->alloc_coherent) + return NULL; + + memory = ops->alloc_coherent(dev, size, dma_handle, + dma_alloc_coherent_gfp_flags(dev, gfp)); + debug_dma_alloc_coherent(dev, size, *dma_handle, memory); + + return memory; +} + +static inline void dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t bus) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + WARN_ON(irqs_disabled()); /* for portability */ + + if (dma_release_from_coherent(dev, get_order(size), vaddr)) + return; + + debug_dma_free_coherent(dev, size, vaddr, bus); + if (ops->free_coherent) + ops->free_coherent(dev, size, vaddr, bus); +} + +#endif diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h new file mode 100644 index 00000000000..0bdb0c54d9a --- /dev/null +++ b/arch/x86/include/asm/dma.h @@ -0,0 +1,317 @@ +/* + * linux/include/asm/dma.h: Defines for using and allocating dma channels. + * Written by Hennus Bergman, 1992. + * High DMA channel support & info by Hannu Savolainen + * and John Boyd, Nov. 1992. + */ + +#ifndef _ASM_X86_DMA_H +#define _ASM_X86_DMA_H + +#include <linux/spinlock.h> /* And spinlocks */ +#include <asm/io.h> /* need byte IO */ + +#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER +#define dma_outb outb_p +#else +#define dma_outb outb +#endif + +#define dma_inb inb + +/* + * NOTES about DMA transfers: + * + * controller 1: channels 0-3, byte operations, ports 00-1F + * controller 2: channels 4-7, word operations, ports C0-DF + * + * - ALL registers are 8 bits only, regardless of transfer size + * - channel 4 is not used - cascades 1 into 2. + * - channels 0-3 are byte - addresses/counts are for physical bytes + * - channels 5-7 are word - addresses/counts are for physical words + * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries + * - transfer count loaded to registers is 1 less than actual count + * - controller 2 offsets are all even (2x offsets for controller 1) + * - page registers for 5-7 don't use data bit 0, represent 128K pages + * - page registers for 0-3 use bit 0, represent 64K pages + * + * DMA transfers are limited to the lower 16MB of _physical_ memory. + * Note that addresses loaded into registers must be _physical_ addresses, + * not logical addresses (which may differ if paging is active). + * + * Address mapping for channels 0-3: + * + * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses) + * | ... | | ... | | ... | + * | ... | | ... | | ... | + * | ... | | ... | | ... | + * P7 ... P0 A7 ... A0 A7 ... A0 + * | Page | Addr MSB | Addr LSB | (DMA registers) + * + * Address mapping for channels 5-7: + * + * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses) + * | ... | \ \ ... \ \ \ ... \ \ + * | ... | \ \ ... \ \ \ ... \ (not used) + * | ... | \ \ ... \ \ \ ... \ + * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0 + * | Page | Addr MSB | Addr LSB | (DMA registers) + * + * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses + * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at + * the hardware level, so odd-byte transfers aren't possible). + * + * Transfer count (_not # bytes_) is limited to 64K, represented as actual + * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more, + * and up to 128K bytes may be transferred on channels 5-7 in one operation. + * + */ + +#define MAX_DMA_CHANNELS 8 + +/* 16MB ISA DMA zone */ +#define MAX_DMA_PFN ((16 * 1024 * 1024) >> PAGE_SHIFT) + +/* 4GB broken PCI/AGP hardware bus master zone */ +#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) + +#ifdef CONFIG_X86_32 +/* The maximum address that we can perform a DMA transfer to on this platform */ +#define MAX_DMA_ADDRESS (PAGE_OFFSET + 0x1000000) +#else +/* Compat define for old dma zone */ +#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT)) +#endif + +/* 8237 DMA controllers */ +#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */ +#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */ + +/* DMA controller registers */ +#define DMA1_CMD_REG 0x08 /* command register (w) */ +#define DMA1_STAT_REG 0x08 /* status register (r) */ +#define DMA1_REQ_REG 0x09 /* request register (w) */ +#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */ +#define DMA1_MODE_REG 0x0B /* mode register (w) */ +#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */ +#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */ +#define DMA1_RESET_REG 0x0D /* Master Clear (w) */ +#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */ +#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */ + +#define DMA2_CMD_REG 0xD0 /* command register (w) */ +#define DMA2_STAT_REG 0xD0 /* status register (r) */ +#define DMA2_REQ_REG 0xD2 /* request register (w) */ +#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */ +#define DMA2_MODE_REG 0xD6 /* mode register (w) */ +#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */ +#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */ +#define DMA2_RESET_REG 0xDA /* Master Clear (w) */ +#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */ +#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */ + +#define DMA_ADDR_0 0x00 /* DMA address registers */ +#define DMA_ADDR_1 0x02 +#define DMA_ADDR_2 0x04 +#define DMA_ADDR_3 0x06 +#define DMA_ADDR_4 0xC0 +#define DMA_ADDR_5 0xC4 +#define DMA_ADDR_6 0xC8 +#define DMA_ADDR_7 0xCC + +#define DMA_CNT_0 0x01 /* DMA count registers */ +#define DMA_CNT_1 0x03 +#define DMA_CNT_2 0x05 +#define DMA_CNT_3 0x07 +#define DMA_CNT_4 0xC2 +#define DMA_CNT_5 0xC6 +#define DMA_CNT_6 0xCA +#define DMA_CNT_7 0xCE + +#define DMA_PAGE_0 0x87 /* DMA page registers */ +#define DMA_PAGE_1 0x83 +#define DMA_PAGE_2 0x81 +#define DMA_PAGE_3 0x82 +#define DMA_PAGE_5 0x8B +#define DMA_PAGE_6 0x89 +#define DMA_PAGE_7 0x8A + +/* I/O to memory, no autoinit, increment, single mode */ +#define DMA_MODE_READ 0x44 +/* memory to I/O, no autoinit, increment, single mode */ +#define DMA_MODE_WRITE 0x48 +/* pass thru DREQ->HRQ, DACK<-HLDA only */ +#define DMA_MODE_CASCADE 0xC0 + +#define DMA_AUTOINIT 0x10 + + +#ifdef CONFIG_ISA_DMA_API +extern spinlock_t dma_spin_lock; + +static inline unsigned long claim_dma_lock(void) +{ + unsigned long flags; + spin_lock_irqsave(&dma_spin_lock, flags); + return flags; +} + +static inline void release_dma_lock(unsigned long flags) +{ + spin_unlock_irqrestore(&dma_spin_lock, flags); +} +#endif /* CONFIG_ISA_DMA_API */ + +/* enable/disable a specific DMA channel */ +static inline void enable_dma(unsigned int dmanr) +{ + if (dmanr <= 3) + dma_outb(dmanr, DMA1_MASK_REG); + else + dma_outb(dmanr & 3, DMA2_MASK_REG); +} + +static inline void disable_dma(unsigned int dmanr) +{ + if (dmanr <= 3) + dma_outb(dmanr | 4, DMA1_MASK_REG); + else + dma_outb((dmanr & 3) | 4, DMA2_MASK_REG); +} + +/* Clear the 'DMA Pointer Flip Flop'. + * Write 0 for LSB/MSB, 1 for MSB/LSB access. + * Use this once to initialize the FF to a known state. + * After that, keep track of it. :-) + * --- In order to do that, the DMA routines below should --- + * --- only be used while holding the DMA lock ! --- + */ +static inline void clear_dma_ff(unsigned int dmanr) +{ + if (dmanr <= 3) + dma_outb(0, DMA1_CLEAR_FF_REG); + else + dma_outb(0, DMA2_CLEAR_FF_REG); +} + +/* set mode (above) for a specific DMA channel */ +static inline void set_dma_mode(unsigned int dmanr, char mode) +{ + if (dmanr <= 3) + dma_outb(mode | dmanr, DMA1_MODE_REG); + else + dma_outb(mode | (dmanr & 3), DMA2_MODE_REG); +} + +/* Set only the page register bits of the transfer address. + * This is used for successive transfers when we know the contents of + * the lower 16 bits of the DMA current address register, but a 64k boundary + * may have been crossed. + */ +static inline void set_dma_page(unsigned int dmanr, char pagenr) +{ + switch (dmanr) { + case 0: + dma_outb(pagenr, DMA_PAGE_0); + break; + case 1: + dma_outb(pagenr, DMA_PAGE_1); + break; + case 2: + dma_outb(pagenr, DMA_PAGE_2); + break; + case 3: + dma_outb(pagenr, DMA_PAGE_3); + break; + case 5: + dma_outb(pagenr & 0xfe, DMA_PAGE_5); + break; + case 6: + dma_outb(pagenr & 0xfe, DMA_PAGE_6); + break; + case 7: + dma_outb(pagenr & 0xfe, DMA_PAGE_7); + break; + } +} + + +/* Set transfer address & page bits for specific DMA channel. + * Assumes dma flipflop is clear. + */ +static inline void set_dma_addr(unsigned int dmanr, unsigned int a) +{ + set_dma_page(dmanr, a>>16); + if (dmanr <= 3) { + dma_outb(a & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE); + dma_outb((a >> 8) & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE); + } else { + dma_outb((a >> 1) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE); + dma_outb((a >> 9) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE); + } +} + + +/* Set transfer size (max 64k for DMA0..3, 128k for DMA5..7) for + * a specific DMA channel. + * You must ensure the parameters are valid. + * NOTE: from a manual: "the number of transfers is one more + * than the initial word count"! This is taken into account. + * Assumes dma flip-flop is clear. + * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7. + */ +static inline void set_dma_count(unsigned int dmanr, unsigned int count) +{ + count--; + if (dmanr <= 3) { + dma_outb(count & 0xff, ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE); + dma_outb((count >> 8) & 0xff, + ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE); + } else { + dma_outb((count >> 1) & 0xff, + ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE); + dma_outb((count >> 9) & 0xff, + ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE); + } +} + + +/* Get DMA residue count. After a DMA transfer, this + * should return zero. Reading this while a DMA transfer is + * still in progress will return unpredictable results. + * If called before the channel has been used, it may return 1. + * Otherwise, it returns the number of _bytes_ left to transfer. + * + * Assumes DMA flip-flop is clear. + */ +static inline int get_dma_residue(unsigned int dmanr) +{ + unsigned int io_port; + /* using short to get 16-bit wrap around */ + unsigned short count; + + io_port = (dmanr <= 3) ? ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE + : ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE; + + count = 1 + dma_inb(io_port); + count += dma_inb(io_port) << 8; + + return (dmanr <= 3) ? count : (count << 1); +} + + +/* These are in kernel/dma.c because x86 uses CONFIG_GENERIC_ISA_DMA */ +#ifdef CONFIG_ISA_DMA_API +extern int request_dma(unsigned int dmanr, const char *device_id); +extern void free_dma(unsigned int dmanr); +#endif + +/* From PCI */ + +#ifdef CONFIG_PCI +extern int isa_dma_bridge_buggy; +#else +#define isa_dma_bridge_buggy (0) +#endif + +#endif /* _ASM_X86_DMA_H */ diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h new file mode 100644 index 00000000000..fd8f9e2ca35 --- /dev/null +++ b/arch/x86/include/asm/dmi.h @@ -0,0 +1,19 @@ +#ifndef _ASM_X86_DMI_H +#define _ASM_X86_DMI_H + +#include <linux/compiler.h> +#include <linux/init.h> + +#include <asm/io.h> +#include <asm/setup.h> + +static __always_inline __init void *dmi_alloc(unsigned len) +{ + return extend_brk(len, sizeof(int)); +} + +/* Use early IO mappings for DMI because it's initialized early */ +#define dmi_ioremap early_ioremap +#define dmi_iounmap early_iounmap + +#endif /* _ASM_X86_DMI_H */ diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h new file mode 100644 index 00000000000..f6f15986df6 --- /dev/null +++ b/arch/x86/include/asm/dwarf2.h @@ -0,0 +1,146 @@ +#ifndef _ASM_X86_DWARF2_H +#define _ASM_X86_DWARF2_H + +#ifndef __ASSEMBLY__ +#warning "asm/dwarf2.h should be only included in pure assembly files" +#endif + +/* + * Macros for dwarf2 CFI unwind table entries. + * See "as.info" for details on these pseudo ops. Unfortunately + * they are only supported in very new binutils, so define them + * away for older version. + */ + +#ifdef CONFIG_AS_CFI + +#define CFI_STARTPROC .cfi_startproc +#define CFI_ENDPROC .cfi_endproc +#define CFI_DEF_CFA .cfi_def_cfa +#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register +#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset +#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset +#define CFI_OFFSET .cfi_offset +#define CFI_REL_OFFSET .cfi_rel_offset +#define CFI_REGISTER .cfi_register +#define CFI_RESTORE .cfi_restore +#define CFI_REMEMBER_STATE .cfi_remember_state +#define CFI_RESTORE_STATE .cfi_restore_state +#define CFI_UNDEFINED .cfi_undefined +#define CFI_ESCAPE .cfi_escape + +#ifdef CONFIG_AS_CFI_SIGNAL_FRAME +#define CFI_SIGNAL_FRAME .cfi_signal_frame +#else +#define CFI_SIGNAL_FRAME +#endif + +#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__) + /* + * Emit CFI data in .debug_frame sections, not .eh_frame sections. + * The latter we currently just discard since we don't do DWARF + * unwinding at runtime. So only the offline DWARF information is + * useful to anyone. Note we should not use this directive if this + * file is used in the vDSO assembly, or if vmlinux.lds.S gets + * changed so it doesn't discard .eh_frame. + */ + .cfi_sections .debug_frame +#endif + +#else + +/* + * Due to the structure of pre-exisiting code, don't use assembler line + * comment character # to ignore the arguments. Instead, use a dummy macro. + */ +.macro cfi_ignore a=0, b=0, c=0, d=0 +.endm + +#define CFI_STARTPROC cfi_ignore +#define CFI_ENDPROC cfi_ignore +#define CFI_DEF_CFA cfi_ignore +#define CFI_DEF_CFA_REGISTER cfi_ignore +#define CFI_DEF_CFA_OFFSET cfi_ignore +#define CFI_ADJUST_CFA_OFFSET cfi_ignore +#define CFI_OFFSET cfi_ignore +#define CFI_REL_OFFSET cfi_ignore +#define CFI_REGISTER cfi_ignore +#define CFI_RESTORE cfi_ignore +#define CFI_REMEMBER_STATE cfi_ignore +#define CFI_RESTORE_STATE cfi_ignore +#define CFI_UNDEFINED cfi_ignore +#define CFI_ESCAPE cfi_ignore +#define CFI_SIGNAL_FRAME cfi_ignore + +#endif + +/* + * An attempt to make CFI annotations more or less + * correct and shorter. It is implied that you know + * what you're doing if you use them. + */ +#ifdef __ASSEMBLY__ +#ifdef CONFIG_X86_64 + .macro pushq_cfi reg + pushq \reg + CFI_ADJUST_CFA_OFFSET 8 + .endm + + .macro popq_cfi reg + popq \reg + CFI_ADJUST_CFA_OFFSET -8 + .endm + + .macro pushfq_cfi + pushfq + CFI_ADJUST_CFA_OFFSET 8 + .endm + + .macro popfq_cfi + popfq + CFI_ADJUST_CFA_OFFSET -8 + .endm + + .macro movq_cfi reg offset=0 + movq %\reg, \offset(%rsp) + CFI_REL_OFFSET \reg, \offset + .endm + + .macro movq_cfi_restore offset reg + movq \offset(%rsp), %\reg + CFI_RESTORE \reg + .endm +#else /*!CONFIG_X86_64*/ + .macro pushl_cfi reg + pushl \reg + CFI_ADJUST_CFA_OFFSET 4 + .endm + + .macro popl_cfi reg + popl \reg + CFI_ADJUST_CFA_OFFSET -4 + .endm + + .macro pushfl_cfi + pushfl + CFI_ADJUST_CFA_OFFSET 4 + .endm + + .macro popfl_cfi + popfl + CFI_ADJUST_CFA_OFFSET -4 + .endm + + .macro movl_cfi reg offset=0 + movl %\reg, \offset(%esp) + CFI_REL_OFFSET \reg, \offset + .endm + + .macro movl_cfi_restore offset reg + movl \offset(%esp), %\reg + CFI_RESTORE \reg + .endm +#endif /*!CONFIG_X86_64*/ +#endif /*__ASSEMBLY__*/ + +#endif /* _ASM_X86_DWARF2_H */ diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h new file mode 100644 index 00000000000..37782566af2 --- /dev/null +++ b/arch/x86/include/asm/e820.h @@ -0,0 +1,149 @@ +#ifndef _ASM_X86_E820_H +#define _ASM_X86_E820_H +#define E820MAP 0x2d0 /* our map */ +#define E820MAX 128 /* number of entries in E820MAP */ + +/* + * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the + * constrained space in the zeropage. If we have more nodes than + * that, and if we've booted off EFI firmware, then the EFI tables + * passed us from the EFI firmware can list more nodes. Size our + * internal memory map tables to have room for these additional + * nodes, based on up to three entries per node for which the + * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT), + * plus E820MAX, allowing space for the possible duplicate E820 + * entries that might need room in the same arrays, prior to the + * call to sanitize_e820_map() to remove duplicates. The allowance + * of three memory map entries per node is "enough" entries for + * the initial hardware platform motivating this mechanism to make + * use of additional EFI map entries. Future platforms may want + * to allow more than three entries per node or otherwise refine + * this size. + */ + +/* + * Odd: 'make headers_check' complains about numa.h if I try + * to collapse the next two #ifdef lines to a single line: + * #if defined(__KERNEL__) && defined(CONFIG_EFI) + */ +#ifdef __KERNEL__ +#ifdef CONFIG_EFI +#include <linux/numa.h> +#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES) +#else /* ! CONFIG_EFI */ +#define E820_X_MAX E820MAX +#endif +#else /* ! __KERNEL__ */ +#define E820_X_MAX E820MAX +#endif + +#define E820NR 0x1e8 /* # entries in E820MAP */ + +#define E820_RAM 1 +#define E820_RESERVED 2 +#define E820_ACPI 3 +#define E820_NVS 4 +#define E820_UNUSABLE 5 + +/* + * reserved RAM used by kernel itself + * if CONFIG_INTEL_TXT is enabled, memory of this type will be + * included in the S3 integrity calculation and so should not include + * any memory that BIOS might alter over the S3 transition + */ +#define E820_RESERVED_KERN 128 + +#ifndef __ASSEMBLY__ +#include <linux/types.h> +struct e820entry { + __u64 addr; /* start of memory segment */ + __u64 size; /* size of memory segment */ + __u32 type; /* type of memory segment */ +} __attribute__((packed)); + +struct e820map { + __u32 nr_map; + struct e820entry map[E820_X_MAX]; +}; + +#define ISA_START_ADDRESS 0xa0000 +#define ISA_END_ADDRESS 0x100000 + +#define BIOS_BEGIN 0x000a0000 +#define BIOS_END 0x00100000 + +#define BIOS_ROM_BASE 0xffe00000 +#define BIOS_ROM_END 0xffffffff + +#ifdef __KERNEL__ +/* see comment in arch/x86/kernel/e820.c */ +extern struct e820map e820; +extern struct e820map e820_saved; + +extern unsigned long pci_mem_start; +extern int e820_any_mapped(u64 start, u64 end, unsigned type); +extern int e820_all_mapped(u64 start, u64 end, unsigned type); +extern void e820_add_region(u64 start, u64 size, int type); +extern void e820_print_map(char *who); +extern int +sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, u32 *pnr_map); +extern u64 e820_update_range(u64 start, u64 size, unsigned old_type, + unsigned new_type); +extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type, + int checktype); +extern void update_e820(void); +extern void e820_setup_gap(void); +extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, + unsigned long start_addr, unsigned long long end_addr); +struct setup_data; +extern void parse_e820_ext(struct setup_data *data); + +#if defined(CONFIG_X86_64) || \ + (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) +extern void e820_mark_nosave_regions(unsigned long limit_pfn); +#else +static inline void e820_mark_nosave_regions(unsigned long limit_pfn) +{ +} +#endif + +#ifdef CONFIG_MEMTEST +extern void early_memtest(unsigned long start, unsigned long end); +#else +static inline void early_memtest(unsigned long start, unsigned long end) +{ +} +#endif + +extern unsigned long e820_end_of_ram_pfn(void); +extern unsigned long e820_end_of_low_ram_pfn(void); +extern u64 early_reserve_e820(u64 sizet, u64 align); + +void memblock_x86_fill(void); +void memblock_find_dma_reserve(void); + +extern void finish_e820_parsing(void); +extern void e820_reserve_resources(void); +extern void e820_reserve_resources_late(void); +extern void setup_memory_map(void); +extern char *default_machine_specific_memory_setup(void); + +/* + * Returns true iff the specified range [s,e) is completely contained inside + * the ISA region. + */ +static inline bool is_ISA_range(u64 s, u64 e) +{ + return s >= ISA_START_ADDRESS && e <= ISA_END_ADDRESS; +} + +#endif /* __KERNEL__ */ +#endif /* __ASSEMBLY__ */ + +#ifdef __KERNEL__ +#include <linux/ioport.h> + +#define HIGH_MEMORY (1024*1024) +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_E820_H */ diff --git a/arch/x86/include/asm/edac.h b/arch/x86/include/asm/edac.h new file mode 100644 index 00000000000..e9b57ecc70c --- /dev/null +++ b/arch/x86/include/asm/edac.h @@ -0,0 +1,18 @@ +#ifndef _ASM_X86_EDAC_H +#define _ASM_X86_EDAC_H + +/* ECC atomic, DMA, SMP and interrupt safe scrub function */ + +static inline void atomic_scrub(void *va, u32 size) +{ + u32 i, *virt_addr = va; + + /* + * Very carefully read and write to memory atomically so we + * are interrupt, DMA and SMP safe. + */ + for (i = 0; i < size / 4; i++, virt_addr++) + asm volatile("lock; addl $0, %0"::"m" (*virt_addr)); +} + +#endif /* _ASM_X86_EDAC_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h new file mode 100644 index 00000000000..844f735fd63 --- /dev/null +++ b/arch/x86/include/asm/efi.h @@ -0,0 +1,115 @@ +#ifndef _ASM_X86_EFI_H +#define _ASM_X86_EFI_H + +#ifdef CONFIG_X86_32 + +#define EFI_LOADER_SIGNATURE "EL32" + +extern unsigned long asmlinkage efi_call_phys(void *, ...); + +#define efi_call_phys0(f) efi_call_phys(f) +#define efi_call_phys1(f, a1) efi_call_phys(f, a1) +#define efi_call_phys2(f, a1, a2) efi_call_phys(f, a1, a2) +#define efi_call_phys3(f, a1, a2, a3) efi_call_phys(f, a1, a2, a3) +#define efi_call_phys4(f, a1, a2, a3, a4) \ + efi_call_phys(f, a1, a2, a3, a4) +#define efi_call_phys5(f, a1, a2, a3, a4, a5) \ + efi_call_phys(f, a1, a2, a3, a4, a5) +#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6) \ + efi_call_phys(f, a1, a2, a3, a4, a5, a6) +/* + * Wrap all the virtual calls in a way that forces the parameters on the stack. + */ + +#define efi_call_virt(f, args...) \ + ((efi_##f##_t __attribute__((regparm(0)))*)efi.systab->runtime->f)(args) + +#define efi_call_virt0(f) efi_call_virt(f) +#define efi_call_virt1(f, a1) efi_call_virt(f, a1) +#define efi_call_virt2(f, a1, a2) efi_call_virt(f, a1, a2) +#define efi_call_virt3(f, a1, a2, a3) efi_call_virt(f, a1, a2, a3) +#define efi_call_virt4(f, a1, a2, a3, a4) \ + efi_call_virt(f, a1, a2, a3, a4) +#define efi_call_virt5(f, a1, a2, a3, a4, a5) \ + efi_call_virt(f, a1, a2, a3, a4, a5) +#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ + efi_call_virt(f, a1, a2, a3, a4, a5, a6) + +#define efi_ioremap(addr, size, type) ioremap_cache(addr, size) + +#else /* !CONFIG_X86_32 */ + +#define EFI_LOADER_SIGNATURE "EL64" + +extern u64 efi_call0(void *fp); +extern u64 efi_call1(void *fp, u64 arg1); +extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); +extern u64 efi_call3(void *fp, u64 arg1, u64 arg2, u64 arg3); +extern u64 efi_call4(void *fp, u64 arg1, u64 arg2, u64 arg3, u64 arg4); +extern u64 efi_call5(void *fp, u64 arg1, u64 arg2, u64 arg3, + u64 arg4, u64 arg5); +extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, + u64 arg4, u64 arg5, u64 arg6); + +#define efi_call_phys0(f) \ + efi_call0((void *)(f)) +#define efi_call_phys1(f, a1) \ + efi_call1((void *)(f), (u64)(a1)) +#define efi_call_phys2(f, a1, a2) \ + efi_call2((void *)(f), (u64)(a1), (u64)(a2)) +#define efi_call_phys3(f, a1, a2, a3) \ + efi_call3((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3)) +#define efi_call_phys4(f, a1, a2, a3, a4) \ + efi_call4((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \ + (u64)(a4)) +#define efi_call_phys5(f, a1, a2, a3, a4, a5) \ + efi_call5((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \ + (u64)(a4), (u64)(a5)) +#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6) \ + efi_call6((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \ + (u64)(a4), (u64)(a5), (u64)(a6)) + +#define efi_call_virt0(f) \ + efi_call0((void *)(efi.systab->runtime->f)) +#define efi_call_virt1(f, a1) \ + efi_call1((void *)(efi.systab->runtime->f), (u64)(a1)) +#define efi_call_virt2(f, a1, a2) \ + efi_call2((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2)) +#define efi_call_virt3(f, a1, a2, a3) \ + efi_call3((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + (u64)(a3)) +#define efi_call_virt4(f, a1, a2, a3, a4) \ + efi_call4((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + (u64)(a3), (u64)(a4)) +#define efi_call_virt5(f, a1, a2, a3, a4, a5) \ + efi_call5((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + (u64)(a3), (u64)(a4), (u64)(a5)) +#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ + efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) + +extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, + u32 type); + +#endif /* CONFIG_X86_32 */ + +extern int add_efi_memmap; +extern void efi_set_executable(efi_memory_desc_t *md, bool executable); +extern void efi_memblock_x86_reserve_range(void); +extern void efi_call_phys_prelog(void); +extern void efi_call_phys_epilog(void); + +#ifndef CONFIG_EFI +/* + * IF EFI is not configured, have the EFI calls return -ENOSYS. + */ +#define efi_call0(_f) (-ENOSYS) +#define efi_call1(_f, _a1) (-ENOSYS) +#define efi_call2(_f, _a1, _a2) (-ENOSYS) +#define efi_call3(_f, _a1, _a2, _a3) (-ENOSYS) +#define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS) +#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS) +#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS) +#endif /* CONFIG_EFI */ + +#endif /* _ASM_X86_EFI_H */ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h new file mode 100644 index 00000000000..5f962df30d0 --- /dev/null +++ b/arch/x86/include/asm/elf.h @@ -0,0 +1,354 @@ +#ifndef _ASM_X86_ELF_H +#define _ASM_X86_ELF_H + +/* + * ELF register definitions.. + */ +#include <linux/thread_info.h> + +#include <asm/ptrace.h> +#include <asm/user.h> +#include <asm/auxvec.h> + +typedef unsigned long elf_greg_t; + +#define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t)) +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef struct user_i387_struct elf_fpregset_t; + +#ifdef __i386__ + +typedef struct user_fxsr_struct elf_fpxregset_t; + +#define R_386_NONE 0 +#define R_386_32 1 +#define R_386_PC32 2 +#define R_386_GOT32 3 +#define R_386_PLT32 4 +#define R_386_COPY 5 +#define R_386_GLOB_DAT 6 +#define R_386_JMP_SLOT 7 +#define R_386_RELATIVE 8 +#define R_386_GOTOFF 9 +#define R_386_GOTPC 10 +#define R_386_NUM 11 + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS32 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_386 + +#else + +/* x86-64 relocation types */ +#define R_X86_64_NONE 0 /* No reloc */ +#define R_X86_64_64 1 /* Direct 64 bit */ +#define R_X86_64_PC32 2 /* PC relative 32 bit signed */ +#define R_X86_64_GOT32 3 /* 32 bit GOT entry */ +#define R_X86_64_PLT32 4 /* 32 bit PLT address */ +#define R_X86_64_COPY 5 /* Copy symbol at runtime */ +#define R_X86_64_GLOB_DAT 6 /* Create GOT entry */ +#define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */ +#define R_X86_64_RELATIVE 8 /* Adjust by program base */ +#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative + offset to GOT */ +#define R_X86_64_32 10 /* Direct 32 bit zero extended */ +#define R_X86_64_32S 11 /* Direct 32 bit sign extended */ +#define R_X86_64_16 12 /* Direct 16 bit zero extended */ +#define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */ +#define R_X86_64_8 14 /* Direct 8 bit sign extended */ +#define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */ + +#define R_X86_64_NUM 16 + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS64 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_X86_64 + +#endif + +#include <asm/vdso.h> + +extern unsigned int vdso_enabled; + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch_ia32(x) \ + (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) + +#include <asm/processor.h> +#include <asm/system.h> + +#ifdef CONFIG_X86_32 +#include <asm/desc.h> + +#define elf_check_arch(x) elf_check_arch_ia32(x) + +/* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx + contains a pointer to a function which might be registered using `atexit'. + This provides a mean for the dynamic linker to call DT_FINI functions for + shared libraries that have been loaded before the code runs. + + A value of 0 tells we have no such handler. + + We might as well make sure everything else is cleared too (except for %esp), + just to make things more deterministic. + */ +#define ELF_PLAT_INIT(_r, load_addr) \ + do { \ + _r->bx = 0; _r->cx = 0; _r->dx = 0; \ + _r->si = 0; _r->di = 0; _r->bp = 0; \ + _r->ax = 0; \ +} while (0) + +/* + * regs is struct pt_regs, pr_reg is elf_gregset_t (which is + * now struct_user_regs, they are different) + */ + +#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \ +do { \ + pr_reg[0] = regs->bx; \ + pr_reg[1] = regs->cx; \ + pr_reg[2] = regs->dx; \ + pr_reg[3] = regs->si; \ + pr_reg[4] = regs->di; \ + pr_reg[5] = regs->bp; \ + pr_reg[6] = regs->ax; \ + pr_reg[7] = regs->ds & 0xffff; \ + pr_reg[8] = regs->es & 0xffff; \ + pr_reg[9] = regs->fs & 0xffff; \ + pr_reg[11] = regs->orig_ax; \ + pr_reg[12] = regs->ip; \ + pr_reg[13] = regs->cs & 0xffff; \ + pr_reg[14] = regs->flags; \ + pr_reg[15] = regs->sp; \ + pr_reg[16] = regs->ss & 0xffff; \ +} while (0); + +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ +do { \ + ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ + pr_reg[10] = get_user_gs(regs); \ +} while (0); + +#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \ +do { \ + ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ + savesegment(gs, pr_reg[10]); \ +} while (0); + +#define ELF_PLATFORM (utsname()->machine) +#define set_personality_64bit() do { } while (0) + +#else /* CONFIG_X86_32 */ + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) \ + ((x)->e_machine == EM_X86_64) + +#define compat_elf_check_arch(x) elf_check_arch_ia32(x) + +static inline void elf_common_init(struct thread_struct *t, + struct pt_regs *regs, const u16 ds) +{ + regs->ax = regs->bx = regs->cx = regs->dx = 0; + regs->si = regs->di = regs->bp = 0; + regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; + regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; + t->fs = t->gs = 0; + t->fsindex = t->gsindex = 0; + t->ds = t->es = ds; +} + +#define ELF_PLAT_INIT(_r, load_addr) \ + elf_common_init(¤t->thread, _r, 0) + +#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ + elf_common_init(¤t->thread, regs, __USER_DS) + +void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); +#define compat_start_thread start_thread_ia32 + +void set_personality_ia32(void); +#define COMPAT_SET_PERSONALITY(ex) set_personality_ia32() + +#define COMPAT_ELF_PLATFORM ("i686") + +/* + * regs is struct pt_regs, pr_reg is elf_gregset_t (which is + * now struct_user_regs, they are different). Assumes current is the process + * getting dumped. + */ + +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ +do { \ + unsigned v; \ + (pr_reg)[0] = (regs)->r15; \ + (pr_reg)[1] = (regs)->r14; \ + (pr_reg)[2] = (regs)->r13; \ + (pr_reg)[3] = (regs)->r12; \ + (pr_reg)[4] = (regs)->bp; \ + (pr_reg)[5] = (regs)->bx; \ + (pr_reg)[6] = (regs)->r11; \ + (pr_reg)[7] = (regs)->r10; \ + (pr_reg)[8] = (regs)->r9; \ + (pr_reg)[9] = (regs)->r8; \ + (pr_reg)[10] = (regs)->ax; \ + (pr_reg)[11] = (regs)->cx; \ + (pr_reg)[12] = (regs)->dx; \ + (pr_reg)[13] = (regs)->si; \ + (pr_reg)[14] = (regs)->di; \ + (pr_reg)[15] = (regs)->orig_ax; \ + (pr_reg)[16] = (regs)->ip; \ + (pr_reg)[17] = (regs)->cs; \ + (pr_reg)[18] = (regs)->flags; \ + (pr_reg)[19] = (regs)->sp; \ + (pr_reg)[20] = (regs)->ss; \ + (pr_reg)[21] = current->thread.fs; \ + (pr_reg)[22] = current->thread.gs; \ + asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \ + asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \ + asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \ + asm("movl %%gs,%0" : "=r" (v)); (pr_reg)[26] = v; \ +} while (0); + +/* I'm not sure if we can use '-' here */ +#define ELF_PLATFORM ("x86_64") +extern void set_personality_64bit(void); +extern unsigned int sysctl_vsyscall32; +extern int force_personality32; + +#endif /* !CONFIG_X86_32 */ + +#define CORE_DUMP_USE_REGSET +#define ELF_EXEC_PAGESIZE 4096 + +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) + +/* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, + but it's not easy, and we've already done it here. */ + +#define ELF_HWCAP (boot_cpu_data.x86_capability[0]) + +/* This yields a string that ld.so will use to load implementation + specific libraries for optimization. This is more specific in + intent than poking at uname or /proc/cpuinfo. + + For the moment, we have only optimizations for the Intel generations, + but that could change... */ + +#define SET_PERSONALITY(ex) set_personality_64bit() + +/* + * An executable for which elf_read_implies_exec() returns TRUE will + * have the READ_IMPLIES_EXEC personality flag set automatically. + */ +#define elf_read_implies_exec(ex, executable_stack) \ + (executable_stack != EXSTACK_DISABLE_X) + +struct task_struct; + +#define ARCH_DLINFO_IA32(vdso_enabled) \ +do { \ + if (vdso_enabled) { \ + NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ + } \ +} while (0) + +#ifdef CONFIG_X86_32 + +#define STACK_RND_MASK (0x7ff) + +#define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) + +#define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled) + +/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ + +#else /* CONFIG_X86_32 */ + +#define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */ + +/* 1GB for 64bit, 8MB for 32bit */ +#define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff) + +#define ARCH_DLINFO \ +do { \ + if (vdso_enabled) \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (unsigned long)current->mm->context.vdso); \ +} while (0) + +#define AT_SYSINFO 32 + +#define COMPAT_ARCH_DLINFO ARCH_DLINFO_IA32(sysctl_vsyscall32) + +#define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) + +#endif /* !CONFIG_X86_32 */ + +#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) + +#define VDSO_ENTRY \ + ((unsigned long)VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall)) + +struct linux_binprm; + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp); + +extern int syscall32_setup_pages(struct linux_binprm *, int exstack); +#define compat_arch_setup_additional_pages syscall32_setup_pages + +extern unsigned long arch_randomize_brk(struct mm_struct *mm); +#define arch_randomize_brk arch_randomize_brk + +/* + * True on X86_32 or when emulating IA32 on X86_64 + */ +static inline int mmap_is_ia32(void) +{ +#ifdef CONFIG_X86_32 + return 1; +#endif +#ifdef CONFIG_IA32_EMULATION + if (test_thread_flag(TIF_IA32)) + return 1; +#endif + return 0; +} + +/* The first two values are special, do not change. See align_addr() */ +enum align_flags { + ALIGN_VA_32 = BIT(0), + ALIGN_VA_64 = BIT(1), + ALIGN_VDSO = BIT(2), + ALIGN_TOPDOWN = BIT(3), +}; + +struct va_alignment { + int flags; + unsigned long mask; +} ____cacheline_aligned; + +extern struct va_alignment va_align; +extern unsigned long align_addr(unsigned long, struct file *, enum align_flags); +#endif /* _ASM_X86_ELF_H */ diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h new file mode 100644 index 00000000000..cc70c1c78ca --- /dev/null +++ b/arch/x86/include/asm/emergency-restart.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_EMERGENCY_RESTART_H +#define _ASM_X86_EMERGENCY_RESTART_H + +enum reboot_type { + BOOT_TRIPLE = 't', + BOOT_KBD = 'k', +#ifdef CONFIG_X86_32 + BOOT_BIOS = 'b', +#endif + BOOT_ACPI = 'a', + BOOT_EFI = 'e', + BOOT_CF9 = 'p', + BOOT_CF9_COND = 'q', +}; + +extern enum reboot_type reboot_type; + +extern void machine_emergency_restart(void); + +#endif /* _ASM_X86_EMERGENCY_RESTART_H */ diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h new file mode 100644 index 00000000000..0baa628e330 --- /dev/null +++ b/arch/x86/include/asm/entry_arch.h @@ -0,0 +1,56 @@ +/* + * This file is designed to contain the BUILD_INTERRUPT specifications for + * all of the extra named interrupt vectors used by the architecture. + * Usually this is the Inter Process Interrupts (IPIs) + */ + +/* + * The following vectors are part of the Linux architecture, there + * is no hardware IRQ pin equivalent for them, they are triggered + * through the ICC by us (IPIs) + */ +#ifdef CONFIG_SMP +BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) +BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) +BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) +BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) +BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) + +.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 +.if NUM_INVALIDATE_TLB_VECTORS > \idx +BUILD_INTERRUPT3(invalidate_interrupt\idx, + (INVALIDATE_TLB_VECTOR_START)+\idx, + smp_invalidate_interrupt) +.endif +.endr +#endif + +BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) + +/* + * every pentium local APIC has two 'local interrupts', with a + * soft-definable vector attached to both interrupts, one of + * which is a timer interrupt, the other one is error counter + * overflow. Linux uses the local APIC timer interrupt to get + * a much simpler SMP time architecture: + */ +#ifdef CONFIG_X86_LOCAL_APIC + +BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) +BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) +BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) + +#ifdef CONFIG_IRQ_WORK +BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR) +#endif + +#ifdef CONFIG_X86_THERMAL_VECTOR +BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) +#endif + +#ifdef CONFIG_X86_MCE_THRESHOLD +BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) +#endif + +#endif diff --git a/arch/x86/include/asm/errno.h b/arch/x86/include/asm/errno.h new file mode 100644 index 00000000000..4c82b503d92 --- /dev/null +++ b/arch/x86/include/asm/errno.h @@ -0,0 +1 @@ +#include <asm-generic/errno.h> diff --git a/arch/x86/include/asm/fb.h b/arch/x86/include/asm/fb.h new file mode 100644 index 00000000000..2519d0679d9 --- /dev/null +++ b/arch/x86/include/asm/fb.h @@ -0,0 +1,17 @@ +#ifndef _ASM_X86_FB_H +#define _ASM_X86_FB_H + +#include <linux/fb.h> +#include <linux/fs.h> +#include <asm/page.h> + +static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, + unsigned long off) +{ + if (boot_cpu_data.x86 > 3) + pgprot_val(vma->vm_page_prot) |= _PAGE_PCD; +} + +extern int fb_is_primary_device(struct fb_info *info); + +#endif /* _ASM_X86_FB_H */ diff --git a/arch/x86/include/asm/fcntl.h b/arch/x86/include/asm/fcntl.h new file mode 100644 index 00000000000..46ab12db573 --- /dev/null +++ b/arch/x86/include/asm/fcntl.h @@ -0,0 +1 @@ +#include <asm-generic/fcntl.h> diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h new file mode 100644 index 00000000000..4da3c0c4c97 --- /dev/null +++ b/arch/x86/include/asm/fixmap.h @@ -0,0 +1,238 @@ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998 Ingo Molnar + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + * x86_32 and x86_64 integration by Gustavo F. Padovan, February 2009 + */ + +#ifndef _ASM_X86_FIXMAP_H +#define _ASM_X86_FIXMAP_H + +#ifndef __ASSEMBLY__ +#include <linux/kernel.h> +#include <asm/acpi.h> +#include <asm/apicdef.h> +#include <asm/page.h> +#ifdef CONFIG_X86_32 +#include <linux/threads.h> +#include <asm/kmap_types.h> +#else +#include <asm/vsyscall.h> +#endif + +/* + * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall + * uses fixmaps that relies on FIXADDR_TOP for proper address calculation. + * Because of this, FIXADDR_TOP x86 integration was left as later work. + */ +#ifdef CONFIG_X86_32 +/* used by vmalloc.c, vsyscall.lds.S. + * + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap. + */ +extern unsigned long __FIXADDR_TOP; +#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) + +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) +#else +#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) + +/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */ +#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) +#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) +#endif + + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. + * for x86_32: We allocate these special addresses + * from the end of virtual memory (0xfffff000) backwards. + * Also this lets us do fail-safe vmalloc(), we + * can guarantee that these special addresses and + * vmalloc()-ed addresses never overlap. + * + * These 'compile-time allocated' memory buffers are + * fixed-size 4k pages (or larger if used with an increment + * higher than 1). Use set_fixmap(idx,phys) to associate + * physical memory with fixmap indices. + * + * TLB entries of such buffers will not be flushed across + * task switches. + */ +enum fixed_addresses { +#ifdef CONFIG_X86_32 + FIX_HOLE, + FIX_VDSO, +#else + VSYSCALL_LAST_PAGE, + VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, + VVAR_PAGE, + VSYSCALL_HPET, +#endif + FIX_DBGP_BASE, + FIX_EARLYCON_MEM_BASE, +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT + FIX_OHCI1394_BASE, +#endif +#ifdef CONFIG_X86_LOCAL_APIC + FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ +#endif +#ifdef CONFIG_X86_IO_APIC + FIX_IO_APIC_BASE_0, + FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, +#endif +#ifdef CONFIG_X86_VISWS_APIC + FIX_CO_CPU, /* Cobalt timer */ + FIX_CO_APIC, /* Cobalt APIC Redirection Table */ + FIX_LI_PCIA, /* Lithium PCI Bridge A */ + FIX_LI_PCIB, /* Lithium PCI Bridge B */ +#endif +#ifdef CONFIG_X86_F00F_BUG + FIX_F00F_IDT, /* Virtual mapping for IDT */ +#endif +#ifdef CONFIG_X86_CYCLONE_TIMER + FIX_CYCLONE_TIMER, /*cyclone timer register*/ +#endif +#ifdef CONFIG_X86_32 + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, +#ifdef CONFIG_PCI_MMCONFIG + FIX_PCIE_MCFG, +#endif +#endif +#ifdef CONFIG_PARAVIRT + FIX_PARAVIRT_BOOTMAP, +#endif + FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ + FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ +#ifdef CONFIG_X86_INTEL_MID + FIX_LNW_VRTC, +#endif + __end_of_permanent_fixed_addresses, + + /* + * 256 temporary boot-time mappings, used by early_ioremap(), + * before ioremap() is functional. + * + * If necessary we round it up to the next 256 pages boundary so + * that we can have a single pgd entry and a single pte table: + */ +#define NR_FIX_BTMAPS 64 +#define FIX_BTMAPS_SLOTS 4 +#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) + FIX_BTMAP_END = + (__end_of_permanent_fixed_addresses ^ + (__end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS - 1)) & + -PTRS_PER_PTE + ? __end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS - + (__end_of_permanent_fixed_addresses & (TOTAL_FIX_BTMAPS - 1)) + : __end_of_permanent_fixed_addresses, + FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, +#ifdef CONFIG_X86_32 + FIX_WP_TEST, +#endif +#ifdef CONFIG_INTEL_TXT + FIX_TBOOT_BASE, +#endif + __end_of_fixed_addresses +}; + + +extern void reserve_top_address(unsigned long reserve); + +#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_BOOT_START (FIXADDR_TOP - FIXADDR_BOOT_SIZE) + +extern int fixmaps_set; + +extern pte_t *kmap_pte; +extern pgprot_t kmap_prot; +extern pte_t *pkmap_page_table; + +void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); +void native_set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags); + +#ifndef CONFIG_PARAVIRT +static inline void __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + native_set_fixmap(idx, phys, flags); +} +#endif + +#define set_fixmap(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL) + +/* + * Some hardware wants to get fixmapped without caching. + */ +#define set_fixmap_nocache(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) + +#define clear_fixmap(idx) \ + __set_fixmap(idx, 0, __pgprot(0)) + +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) +#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) + +extern void __this_fixmap_does_not_exist(void); + +/* + * 'index to address' translation. If anyone tries to use the idx + * directly without translation, we catch the bug with a NULL-deference + * kernel oops. Illegal ranges of incoming indices are caught too. + */ +static __always_inline unsigned long fix_to_virt(const unsigned int idx) +{ + /* + * this branch gets completely eliminated after inlining, + * except when someone tries to use fixaddr indices in an + * illegal way. (such as mixing up address types or using + * out-of-range indices). + * + * If it doesn't get removed, the linker will complain + * loudly with a reasonably clear error message.. + */ + if (idx >= __end_of_fixed_addresses) + __this_fixmap_does_not_exist(); + + return __fix_to_virt(idx); +} + +static inline unsigned long virt_to_fix(const unsigned long vaddr) +{ + BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); + return __virt_to_fix(vaddr); +} + +/* Return an pointer with offset calculated */ +static __always_inline unsigned long +__set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) +{ + __set_fixmap(idx, phys, flags); + return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1)); +} + +#define set_fixmap_offset(idx, phys) \ + __set_fixmap_offset(idx, phys, PAGE_KERNEL) + +#define set_fixmap_offset_nocache(idx, phys) \ + __set_fixmap_offset(idx, phys, PAGE_KERNEL_NOCACHE) + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h new file mode 100644 index 00000000000..dbe82a5c5ea --- /dev/null +++ b/arch/x86/include/asm/floppy.h @@ -0,0 +1,281 @@ +/* + * Architecture specific parts of the Floppy driver + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995 + */ +#ifndef _ASM_X86_FLOPPY_H +#define _ASM_X86_FLOPPY_H + +#include <linux/vmalloc.h> + +/* + * The DMA channel used by the floppy controller cannot access data at + * addresses >= 16MB + * + * Went back to the 1MB limit, as some people had problems with the floppy + * driver otherwise. It doesn't matter much for performance anyway, as most + * floppy accesses go through the track buffer. + */ +#define _CROSS_64KB(a, s, vdma) \ + (!(vdma) && \ + ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64)) + +#define CROSS_64KB(a, s) _CROSS_64KB(a, s, use_virtual_dma & 1) + + +#define SW fd_routine[use_virtual_dma & 1] +#define CSW fd_routine[can_use_virtual_dma & 1] + + +#define fd_inb(port) inb_p(port) +#define fd_outb(value, port) outb_p(value, port) + +#define fd_request_dma() CSW._request_dma(FLOPPY_DMA, "floppy") +#define fd_free_dma() CSW._free_dma(FLOPPY_DMA) +#define fd_enable_irq() enable_irq(FLOPPY_IRQ) +#define fd_disable_irq() disable_irq(FLOPPY_IRQ) +#define fd_free_irq() free_irq(FLOPPY_IRQ, NULL) +#define fd_get_dma_residue() SW._get_dma_residue(FLOPPY_DMA) +#define fd_dma_mem_alloc(size) SW._dma_mem_alloc(size) +#define fd_dma_setup(addr, size, mode, io) SW._dma_setup(addr, size, mode, io) + +#define FLOPPY_CAN_FALLBACK_ON_NODMA + +static int virtual_dma_count; +static int virtual_dma_residue; +static char *virtual_dma_addr; +static int virtual_dma_mode; +static int doing_pdma; + +static irqreturn_t floppy_hardint(int irq, void *dev_id) +{ + unsigned char st; + +#undef TRACE_FLPY_INT + +#ifdef TRACE_FLPY_INT + static int calls; + static int bytes; + static int dma_wait; +#endif + if (!doing_pdma) + return floppy_interrupt(irq, dev_id); + +#ifdef TRACE_FLPY_INT + if (!calls) + bytes = virtual_dma_count; +#endif + + { + int lcount; + char *lptr; + + st = 1; + for (lcount = virtual_dma_count, lptr = virtual_dma_addr; + lcount; lcount--, lptr++) { + st = inb(virtual_dma_port + 4) & 0xa0; + if (st != 0xa0) + break; + if (virtual_dma_mode) + outb_p(*lptr, virtual_dma_port + 5); + else + *lptr = inb_p(virtual_dma_port + 5); + } + virtual_dma_count = lcount; + virtual_dma_addr = lptr; + st = inb(virtual_dma_port + 4); + } + +#ifdef TRACE_FLPY_INT + calls++; +#endif + if (st == 0x20) + return IRQ_HANDLED; + if (!(st & 0x20)) { + virtual_dma_residue += virtual_dma_count; + virtual_dma_count = 0; +#ifdef TRACE_FLPY_INT + printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", + virtual_dma_count, virtual_dma_residue, calls, bytes, + dma_wait); + calls = 0; + dma_wait = 0; +#endif + doing_pdma = 0; + floppy_interrupt(irq, dev_id); + return IRQ_HANDLED; + } +#ifdef TRACE_FLPY_INT + if (!virtual_dma_count) + dma_wait++; +#endif + return IRQ_HANDLED; +} + +static void fd_disable_dma(void) +{ + if (!(can_use_virtual_dma & 1)) + disable_dma(FLOPPY_DMA); + doing_pdma = 0; + virtual_dma_residue += virtual_dma_count; + virtual_dma_count = 0; +} + +static int vdma_request_dma(unsigned int dmanr, const char *device_id) +{ + return 0; +} + +static void vdma_nop(unsigned int dummy) +{ +} + + +static int vdma_get_dma_residue(unsigned int dummy) +{ + return virtual_dma_count + virtual_dma_residue; +} + + +static int fd_request_irq(void) +{ + if (can_use_virtual_dma) + return request_irq(FLOPPY_IRQ, floppy_hardint, + IRQF_DISABLED, "floppy", NULL); + else + return request_irq(FLOPPY_IRQ, floppy_interrupt, + IRQF_DISABLED, "floppy", NULL); +} + +static unsigned long dma_mem_alloc(unsigned long size) +{ + return __get_dma_pages(GFP_KERNEL|__GFP_NORETRY, get_order(size)); +} + + +static unsigned long vdma_mem_alloc(unsigned long size) +{ + return (unsigned long)vmalloc(size); + +} + +#define nodma_mem_alloc(size) vdma_mem_alloc(size) + +static void _fd_dma_mem_free(unsigned long addr, unsigned long size) +{ + if ((unsigned long)addr >= (unsigned long)high_memory) + vfree((void *)addr); + else + free_pages(addr, get_order(size)); +} + +#define fd_dma_mem_free(addr, size) _fd_dma_mem_free(addr, size) + +static void _fd_chose_dma_mode(char *addr, unsigned long size) +{ + if (can_use_virtual_dma == 2) { + if ((unsigned long)addr >= (unsigned long)high_memory || + isa_virt_to_bus(addr) >= 0x1000000 || + _CROSS_64KB(addr, size, 0)) + use_virtual_dma = 1; + else + use_virtual_dma = 0; + } else { + use_virtual_dma = can_use_virtual_dma & 1; + } +} + +#define fd_chose_dma_mode(addr, size) _fd_chose_dma_mode(addr, size) + + +static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io) +{ + doing_pdma = 1; + virtual_dma_port = io; + virtual_dma_mode = (mode == DMA_MODE_WRITE); + virtual_dma_addr = addr; + virtual_dma_count = size; + virtual_dma_residue = 0; + return 0; +} + +static int hard_dma_setup(char *addr, unsigned long size, int mode, int io) +{ +#ifdef FLOPPY_SANITY_CHECK + if (CROSS_64KB(addr, size)) { + printk("DMA crossing 64-K boundary %p-%p\n", addr, addr+size); + return -1; + } +#endif + /* actual, physical DMA */ + doing_pdma = 0; + clear_dma_ff(FLOPPY_DMA); + set_dma_mode(FLOPPY_DMA, mode); + set_dma_addr(FLOPPY_DMA, isa_virt_to_bus(addr)); + set_dma_count(FLOPPY_DMA, size); + enable_dma(FLOPPY_DMA); + return 0; +} + +static struct fd_routine_l { + int (*_request_dma)(unsigned int dmanr, const char *device_id); + void (*_free_dma)(unsigned int dmanr); + int (*_get_dma_residue)(unsigned int dummy); + unsigned long (*_dma_mem_alloc)(unsigned long size); + int (*_dma_setup)(char *addr, unsigned long size, int mode, int io); +} fd_routine[] = { + { + request_dma, + free_dma, + get_dma_residue, + dma_mem_alloc, + hard_dma_setup + }, + { + vdma_request_dma, + vdma_nop, + vdma_get_dma_residue, + vdma_mem_alloc, + vdma_dma_setup + } +}; + + +static int FDC1 = 0x3f0; +static int FDC2 = -1; + +/* + * Floppy types are stored in the rtc's CMOS RAM and so rtc_lock + * is needed to prevent corrupted CMOS RAM in case "insmod floppy" + * coincides with another rtc CMOS user. Paul G. + */ +#define FLOPPY0_TYPE \ +({ \ + unsigned long flags; \ + unsigned char val; \ + spin_lock_irqsave(&rtc_lock, flags); \ + val = (CMOS_READ(0x10) >> 4) & 15; \ + spin_unlock_irqrestore(&rtc_lock, flags); \ + val; \ +}) + +#define FLOPPY1_TYPE \ +({ \ + unsigned long flags; \ + unsigned char val; \ + spin_lock_irqsave(&rtc_lock, flags); \ + val = CMOS_READ(0x10) & 15; \ + spin_unlock_irqrestore(&rtc_lock, flags); \ + val; \ +}) + +#define N_FDC 2 +#define N_DRIVE 8 + +#define EXTRA_FLOPPY_PARAMS + +#endif /* _ASM_X86_FLOPPY_H */ diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h new file mode 100644 index 00000000000..3b629f47eb6 --- /dev/null +++ b/arch/x86/include/asm/frame.h @@ -0,0 +1,26 @@ +#ifdef __ASSEMBLY__ + +#include <asm/asm.h> +#include <asm/dwarf2.h> + +/* The annotation hides the frame from the unwinder and makes it look + like a ordinary ebp save/restore. This avoids some special cases for + frame pointer later */ +#ifdef CONFIG_FRAME_POINTER + .macro FRAME + __ASM_SIZE(push,_cfi) %__ASM_REG(bp) + CFI_REL_OFFSET __ASM_REG(bp), 0 + __ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp) + .endm + .macro ENDFRAME + __ASM_SIZE(pop,_cfi) %__ASM_REG(bp) + CFI_RESTORE __ASM_REG(bp) + .endm +#else + .macro FRAME + .endm + .macro ENDFRAME + .endm +#endif + +#endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h new file mode 100644 index 00000000000..268c783ab1c --- /dev/null +++ b/arch/x86/include/asm/ftrace.h @@ -0,0 +1,57 @@ +#ifndef _ASM_X86_FTRACE_H +#define _ASM_X86_FTRACE_H + +#ifdef __ASSEMBLY__ + + .macro MCOUNT_SAVE_FRAME + /* taken from glibc */ + subq $0x38, %rsp + movq %rax, (%rsp) + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rsi, 24(%rsp) + movq %rdi, 32(%rsp) + movq %r8, 40(%rsp) + movq %r9, 48(%rsp) + .endm + + .macro MCOUNT_RESTORE_FRAME + movq 48(%rsp), %r9 + movq 40(%rsp), %r8 + movq 32(%rsp), %rdi + movq 24(%rsp), %rsi + movq 16(%rsp), %rdx + movq 8(%rsp), %rcx + movq (%rsp), %rax + addq $0x38, %rsp + .endm + +#endif + +#ifdef CONFIG_FUNCTION_TRACER +#define MCOUNT_ADDR ((long)(mcount)) +#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ + +#ifndef __ASSEMBLY__ +extern void mcount(void); + +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + /* + * addr is the address of the mcount call instruction. + * recordmcount does the necessary offset calculation. + */ + return addr; +} + +#ifdef CONFIG_DYNAMIC_FTRACE + +struct dyn_arch_ftrace { + /* No extra data needed for x86 */ +}; + +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_FUNCTION_TRACER */ + +#endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h new file mode 100644 index 00000000000..d09bb03653f --- /dev/null +++ b/arch/x86/include/asm/futex.h @@ -0,0 +1,142 @@ +#ifndef _ASM_X86_FUTEX_H +#define _ASM_X86_FUTEX_H + +#ifdef __KERNEL__ + +#include <linux/futex.h> +#include <linux/uaccess.h> + +#include <asm/asm.h> +#include <asm/errno.h> +#include <asm/processor.h> +#include <asm/system.h> + +#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ + asm volatile("1:\t" insn "\n" \ + "2:\t.section .fixup,\"ax\"\n" \ + "3:\tmov\t%3, %1\n" \ + "\tjmp\t2b\n" \ + "\t.previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ + : "i" (-EFAULT), "0" (oparg), "1" (0)) + +#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ + asm volatile("1:\tmovl %2, %0\n" \ + "\tmovl\t%0, %3\n" \ + "\t" insn "\n" \ + "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ + "\tjnz\t1b\n" \ + "3:\t.section .fixup,\"ax\"\n" \ + "4:\tmov\t%5, %1\n" \ + "\tjmp\t3b\n" \ + "\t.previous\n" \ + _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE(2b, 4b) \ + : "=&a" (oldval), "=&r" (ret), \ + "+m" (*uaddr), "=&r" (tem) \ + : "r" (oparg), "i" (-EFAULT), "1" (0)) + +static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + +#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) + /* Real i386 machines can only support FUTEX_OP_SET */ + if (op != FUTEX_OP_SET && boot_cpu_data.x86 == 3) + return -ENOSYS; +#endif + + pagefault_disable(); + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, + uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + pagefault_enable(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: + ret = (oldval == cmparg); + break; + case FUTEX_OP_CMP_NE: + ret = (oldval != cmparg); + break; + case FUTEX_OP_CMP_LT: + ret = (oldval < cmparg); + break; + case FUTEX_OP_CMP_GE: + ret = (oldval >= cmparg); + break; + case FUTEX_OP_CMP_LE: + ret = (oldval <= cmparg); + break; + case FUTEX_OP_CMP_GT: + ret = (oldval > cmparg); + break; + default: + ret = -ENOSYS; + } + } + return ret; +} + +static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int ret = 0; + +#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) + /* Real i386 machines have no cmpxchg instruction */ + if (boot_cpu_data.x86 == 3) + return -ENOSYS; +#endif + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" + "2:\t.section .fixup, \"ax\"\n" + "3:\tmov %3, %0\n" + "\tjmp 2b\n" + "\t.previous\n" + _ASM_EXTABLE(1b, 3b) + : "+r" (ret), "=a" (oldval), "+m" (*uaddr) + : "i" (-EFAULT), "r" (newval), "1" (oldval) + : "memory" + ); + + *uval = oldval; + return ret; +} + +#endif +#endif /* _ASM_X86_FUTEX_H */ diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h new file mode 100644 index 00000000000..156cd5d18d2 --- /dev/null +++ b/arch/x86/include/asm/gart.h @@ -0,0 +1,113 @@ +#ifndef _ASM_X86_GART_H +#define _ASM_X86_GART_H + +#include <asm/e820.h> + +extern void set_up_gart_resume(u32, u32); + +extern int fallback_aper_order; +extern int fallback_aper_force; +extern int fix_aperture; + +/* PTE bits. */ +#define GPTE_VALID 1 +#define GPTE_COHERENT 2 + +/* Aperture control register bits. */ +#define GARTEN (1<<0) +#define DISGARTCPU (1<<4) +#define DISGARTIO (1<<5) +#define DISTLBWALKPRB (1<<6) + +/* GART cache control register bits. */ +#define INVGART (1<<0) +#define GARTPTEERR (1<<1) + +/* K8 On-cpu GART registers */ +#define AMD64_GARTAPERTURECTL 0x90 +#define AMD64_GARTAPERTUREBASE 0x94 +#define AMD64_GARTTABLEBASE 0x98 +#define AMD64_GARTCACHECTL 0x9c + +#ifdef CONFIG_GART_IOMMU +extern int gart_iommu_aperture; +extern int gart_iommu_aperture_allowed; +extern int gart_iommu_aperture_disabled; + +extern void early_gart_iommu_check(void); +extern int gart_iommu_init(void); +extern void __init gart_parse_options(char *); +extern int gart_iommu_hole_init(void); + +#else +#define gart_iommu_aperture 0 +#define gart_iommu_aperture_allowed 0 +#define gart_iommu_aperture_disabled 1 + +static inline void early_gart_iommu_check(void) +{ +} +static inline void gart_parse_options(char *options) +{ +} +static inline int gart_iommu_hole_init(void) +{ + return -ENODEV; +} +#endif + +extern int agp_amd64_init(void); + +static inline void gart_set_size_and_enable(struct pci_dev *dev, u32 order) +{ + u32 ctl; + + /* + * Don't enable translation but enable GART IO and CPU accesses. + * Also, set DISTLBWALKPRB since GART tables memory is UC. + */ + ctl = order << 1; + + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); +} + +static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) +{ + u32 tmp, ctl; + + /* address of the mappings table */ + addr >>= 12; + tmp = (u32) addr<<4; + tmp &= ~0xf; + pci_write_config_dword(dev, AMD64_GARTTABLEBASE, tmp); + + /* Enable GART translation for this hammer. */ + pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); + ctl |= GARTEN | DISTLBWALKPRB; + ctl &= ~(DISGARTCPU | DISGARTIO); + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); +} + +static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size) +{ + if (!aper_base) + return 0; + + if (aper_base + aper_size > 0x100000000ULL) { + printk(KERN_INFO "Aperture beyond 4GB. Ignoring.\n"); + return 0; + } + if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { + printk(KERN_INFO "Aperture pointing to e820 RAM. Ignoring.\n"); + return 0; + } + if (aper_size < min_size) { + printk(KERN_INFO "Aperture too small (%d MB) than (%d MB)\n", + aper_size>>20, min_size>>20); + return 0; + } + + return 1; +} + +#endif /* _ASM_X86_GART_H */ diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h new file mode 100644 index 00000000000..4b8b98fa7f2 --- /dev/null +++ b/arch/x86/include/asm/genapic.h @@ -0,0 +1 @@ +#include <asm/apic.h> diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h new file mode 100644 index 00000000000..7cd73552a4e --- /dev/null +++ b/arch/x86/include/asm/geode.h @@ -0,0 +1,36 @@ +/* + * AMD Geode definitions + * Copyright (C) 2006, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef _ASM_X86_GEODE_H +#define _ASM_X86_GEODE_H + +#include <asm/processor.h> +#include <linux/io.h> +#include <linux/cs5535.h> + +static inline int is_geode_gx(void) +{ + return ((boot_cpu_data.x86_vendor == X86_VENDOR_NSC) && + (boot_cpu_data.x86 == 5) && + (boot_cpu_data.x86_model == 5)); +} + +static inline int is_geode_lx(void) +{ + return ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && + (boot_cpu_data.x86 == 5) && + (boot_cpu_data.x86_model == 10)); +} + +static inline int is_geode(void) +{ + return (is_geode_gx() || is_geode_lx()); +} + +#endif /* _ASM_X86_GEODE_H */ diff --git a/arch/x86/include/asm/gpio.h b/arch/x86/include/asm/gpio.h new file mode 100644 index 00000000000..91d915a6525 --- /dev/null +++ b/arch/x86/include/asm/gpio.h @@ -0,0 +1,53 @@ +/* + * Generic GPIO API implementation for x86. + * + * Derived from the generic GPIO API for powerpc: + * + * Copyright (c) 2007-2008 MontaVista Software, Inc. + * + * Author: Anton Vorontsov <avorontsov@ru.mvista.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _ASM_X86_GPIO_H +#define _ASM_X86_GPIO_H + +#include <asm-generic/gpio.h> + +#ifdef CONFIG_GPIOLIB + +/* + * Just call gpiolib. + */ +static inline int gpio_get_value(unsigned int gpio) +{ + return __gpio_get_value(gpio); +} + +static inline void gpio_set_value(unsigned int gpio, int value) +{ + __gpio_set_value(gpio, value); +} + +static inline int gpio_cansleep(unsigned int gpio) +{ + return __gpio_cansleep(gpio); +} + +static inline int gpio_to_irq(unsigned int gpio) +{ + return __gpio_to_irq(gpio); +} + +static inline int irq_to_gpio(unsigned int irq) +{ + return -EINVAL; +} + +#endif /* CONFIG_GPIOLIB */ + +#endif /* _ASM_X86_GPIO_H */ diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h new file mode 100644 index 00000000000..da0b3ca815b --- /dev/null +++ b/arch/x86/include/asm/hardirq.h @@ -0,0 +1,56 @@ +#ifndef _ASM_X86_HARDIRQ_H +#define _ASM_X86_HARDIRQ_H + +#include <linux/threads.h> +#include <linux/irq.h> + +typedef struct { + unsigned int __softirq_pending; + unsigned int __nmi_count; /* arch dependent */ + unsigned int irq0_irqs; +#ifdef CONFIG_X86_LOCAL_APIC + unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int irq_spurious_count; + unsigned int icr_read_retry_count; +#endif + unsigned int x86_platform_ipis; /* arch dependent */ + unsigned int apic_perf_irqs; + unsigned int apic_irq_work_irqs; +#ifdef CONFIG_SMP + unsigned int irq_resched_count; + unsigned int irq_call_count; + unsigned int irq_tlb_count; +#endif +#ifdef CONFIG_X86_THERMAL_VECTOR + unsigned int irq_thermal_count; +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD + unsigned int irq_threshold_count; +#endif +} ____cacheline_aligned irq_cpustat_t; + +DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); + +/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ +#define MAX_HARDIRQS_PER_CPU NR_VECTORS + +#define __ARCH_IRQ_STAT + +#define inc_irq_stat(member) percpu_inc(irq_stat.member) + +#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) + +#define __ARCH_SET_SOFTIRQ_PENDING + +#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) + +extern void ack_bad_irq(unsigned int irq); + +extern u64 arch_irq_stat_cpu(unsigned int cpu); +#define arch_irq_stat_cpu arch_irq_stat_cpu + +extern u64 arch_irq_stat(void); +#define arch_irq_stat arch_irq_stat + +#endif /* _ASM_X86_HARDIRQ_H */ diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h new file mode 100644 index 00000000000..3bd04022fd0 --- /dev/null +++ b/arch/x86/include/asm/highmem.h @@ -0,0 +1,77 @@ +/* + * highmem.h: virtual kernel memory mappings for high memory + * + * Used in CONFIG_HIGHMEM systems for memory pages which + * are not addressable by direct kernel virtual addresses. + * + * Copyright (C) 1999 Gerhard Wichert, Siemens AG + * Gerhard.Wichert@pdb.siemens.de + * + * + * Redesigned the x86 32-bit VM architecture to deal with + * up to 16 Terabyte physical memory. With current x86 CPUs + * we now support up to 64 Gigabytes physical RAM. + * + * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> + */ + +#ifndef _ASM_X86_HIGHMEM_H +#define _ASM_X86_HIGHMEM_H + +#ifdef __KERNEL__ + +#include <linux/interrupt.h> +#include <linux/threads.h> +#include <asm/kmap_types.h> +#include <asm/tlbflush.h> +#include <asm/paravirt.h> +#include <asm/fixmap.h> + +/* declarations for highmem.c */ +extern unsigned long highstart_pfn, highend_pfn; + +/* + * Right now we initialize only a single pte table. It can be extended + * easily, subsequent pte tables have to be allocated in one physical + * chunk of RAM. + */ +/* + * Ordering is: + * + * FIXADDR_TOP + * fixed_addresses + * FIXADDR_START + * temp fixed addresses + * FIXADDR_BOOT_START + * Persistent kmap area + * PKMAP_BASE + * VMALLOC_END + * Vmalloc area + * VMALLOC_START + * high_memory + */ +#define LAST_PKMAP_MASK (LAST_PKMAP-1) +#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) + +extern void *kmap_high(struct page *page); +extern void kunmap_high(struct page *page); + +void *kmap(struct page *page); +void kunmap(struct page *page); + +void *kmap_atomic_prot(struct page *page, pgprot_t prot); +void *__kmap_atomic(struct page *page); +void __kunmap_atomic(void *kvaddr); +void *kmap_atomic_pfn(unsigned long pfn); +void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); +struct page *kmap_atomic_to_page(void *ptr); + +#define flush_cache_kmaps() do { } while (0) + +extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, + unsigned long end_pfn); + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_HIGHMEM_H */ diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h new file mode 100644 index 00000000000..2c392d663dc --- /dev/null +++ b/arch/x86/include/asm/hpet.h @@ -0,0 +1,118 @@ +#ifndef _ASM_X86_HPET_H +#define _ASM_X86_HPET_H + +#include <linux/msi.h> + +#ifdef CONFIG_HPET_TIMER + +#define HPET_MMAP_SIZE 1024 + +#define HPET_ID 0x000 +#define HPET_PERIOD 0x004 +#define HPET_CFG 0x010 +#define HPET_STATUS 0x020 +#define HPET_COUNTER 0x0f0 + +#define HPET_Tn_CFG(n) (0x100 + 0x20 * n) +#define HPET_Tn_CMP(n) (0x108 + 0x20 * n) +#define HPET_Tn_ROUTE(n) (0x110 + 0x20 * n) + +#define HPET_T0_CFG 0x100 +#define HPET_T0_CMP 0x108 +#define HPET_T0_ROUTE 0x110 +#define HPET_T1_CFG 0x120 +#define HPET_T1_CMP 0x128 +#define HPET_T1_ROUTE 0x130 +#define HPET_T2_CFG 0x140 +#define HPET_T2_CMP 0x148 +#define HPET_T2_ROUTE 0x150 + +#define HPET_ID_REV 0x000000ff +#define HPET_ID_NUMBER 0x00001f00 +#define HPET_ID_64BIT 0x00002000 +#define HPET_ID_LEGSUP 0x00008000 +#define HPET_ID_VENDOR 0xffff0000 +#define HPET_ID_NUMBER_SHIFT 8 +#define HPET_ID_VENDOR_SHIFT 16 + +#define HPET_ID_VENDOR_8086 0x8086 + +#define HPET_CFG_ENABLE 0x001 +#define HPET_CFG_LEGACY 0x002 +#define HPET_LEGACY_8254 2 +#define HPET_LEGACY_RTC 8 + +#define HPET_TN_LEVEL 0x0002 +#define HPET_TN_ENABLE 0x0004 +#define HPET_TN_PERIODIC 0x0008 +#define HPET_TN_PERIODIC_CAP 0x0010 +#define HPET_TN_64BIT_CAP 0x0020 +#define HPET_TN_SETVAL 0x0040 +#define HPET_TN_32BIT 0x0100 +#define HPET_TN_ROUTE 0x3e00 +#define HPET_TN_FSB 0x4000 +#define HPET_TN_FSB_CAP 0x8000 +#define HPET_TN_ROUTE_SHIFT 9 + +/* Max HPET Period is 10^8 femto sec as in HPET spec */ +#define HPET_MAX_PERIOD 100000000UL +/* + * Min HPET period is 10^5 femto sec just for safety. If it is less than this, + * then 32 bit HPET counter wrapsaround in less than 0.5 sec. + */ +#define HPET_MIN_PERIOD 100000UL + +/* hpet memory map physical address */ +extern unsigned long hpet_address; +extern unsigned long force_hpet_address; +extern u8 hpet_blockid; +extern int hpet_force_user; +extern u8 hpet_msi_disable; +extern int is_hpet_enabled(void); +extern int hpet_enable(void); +extern void hpet_disable(void); +extern unsigned int hpet_readl(unsigned int a); +extern void force_hpet_resume(void); + +struct irq_data; +extern void hpet_msi_unmask(struct irq_data *data); +extern void hpet_msi_mask(struct irq_data *data); +struct hpet_dev; +extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg); +extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg); + +#ifdef CONFIG_PCI_MSI +extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id); +#else +static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id) +{ + return -EINVAL; +} +#endif + +#ifdef CONFIG_HPET_EMULATE_RTC + +#include <linux/interrupt.h> + +typedef irqreturn_t (*rtc_irq_handler)(int interrupt, void *cookie); +extern int hpet_mask_rtc_irq_bit(unsigned long bit_mask); +extern int hpet_set_rtc_irq_bit(unsigned long bit_mask); +extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min, + unsigned char sec); +extern int hpet_set_periodic_freq(unsigned long freq); +extern int hpet_rtc_dropped_irq(void); +extern int hpet_rtc_timer_init(void); +extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id); +extern int hpet_register_irq_handler(rtc_irq_handler handler); +extern void hpet_unregister_irq_handler(rtc_irq_handler handler); + +#endif /* CONFIG_HPET_EMULATE_RTC */ + +#else /* CONFIG_HPET_TIMER */ + +static inline int hpet_enable(void) { return 0; } +static inline int is_hpet_enabled(void) { return 0; } +#define hpet_readl(a) 0 + +#endif +#endif /* _ASM_X86_HPET_H */ diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h new file mode 100644 index 00000000000..439a9acc132 --- /dev/null +++ b/arch/x86/include/asm/hugetlb.h @@ -0,0 +1,93 @@ +#ifndef _ASM_X86_HUGETLB_H +#define _ASM_X86_HUGETLB_H + +#include <asm/page.h> + + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) { + return 0; +} + +/* + * If the arch doesn't supply something else, assume that hugepage + * size aligned regions are ok without further preparation. + */ +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + struct hstate *h = hstate_file(file); + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (addr & ~huge_page_mask(h)) + return -EINVAL; + return 0; +} + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) { +} + +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, + unsigned long ceiling) +{ + free_pgd_range(tlb, addr, end, floor, ceiling); +} + +static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte_at(mm, addr, ptep, pte); +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +#endif /* _ASM_X86_HUGETLB_H */ diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h new file mode 100644 index 00000000000..824ca07860d --- /dev/null +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -0,0 +1,76 @@ +#ifndef _I386_HW_BREAKPOINT_H +#define _I386_HW_BREAKPOINT_H + +#ifdef __KERNEL__ +#define __ARCH_HW_BREAKPOINT_H + +/* + * The name should probably be something dealt in + * a higher level. While dealing with the user + * (display/resolving) + */ +struct arch_hw_breakpoint { + unsigned long address; + u8 len; + u8 type; +}; + +#include <linux/kdebug.h> +#include <linux/percpu.h> +#include <linux/list.h> + +/* Available HW breakpoint length encodings */ +#define X86_BREAKPOINT_LEN_X 0x40 +#define X86_BREAKPOINT_LEN_1 0x40 +#define X86_BREAKPOINT_LEN_2 0x44 +#define X86_BREAKPOINT_LEN_4 0x4c + +#ifdef CONFIG_X86_64 +#define X86_BREAKPOINT_LEN_8 0x48 +#endif + +/* Available HW breakpoint type encodings */ + +/* trigger on instruction execute */ +#define X86_BREAKPOINT_EXECUTE 0x80 +/* trigger on memory write */ +#define X86_BREAKPOINT_WRITE 0x81 +/* trigger on memory read or write */ +#define X86_BREAKPOINT_RW 0x83 + +/* Total number of available HW breakpoint registers */ +#define HBP_NUM 4 + +static inline int hw_breakpoint_slots(int type) +{ + return HBP_NUM; +} + +struct perf_event; +struct pmu; + +extern int arch_check_bp_in_kernelspace(struct perf_event *bp); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp); +extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, + unsigned long val, void *data); + + +int arch_install_hw_breakpoint(struct perf_event *bp); +void arch_uninstall_hw_breakpoint(struct perf_event *bp); +void hw_breakpoint_pmu_read(struct perf_event *bp); +void hw_breakpoint_pmu_unthrottle(struct perf_event *bp); + +extern void +arch_fill_perf_breakpoint(struct perf_event *bp); + +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type); +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type); + +extern int arch_bp_generic_fields(int x86_len, int x86_type, + int *gen_len, int *gen_type); + +extern struct pmu perf_ops_bp; + +#endif /* __KERNEL__ */ +#endif /* _I386_HW_BREAKPOINT_H */ + diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h new file mode 100644 index 00000000000..eb92a6ed2be --- /dev/null +++ b/arch/x86/include/asm/hw_irq.h @@ -0,0 +1,182 @@ +#ifndef _ASM_X86_HW_IRQ_H +#define _ASM_X86_HW_IRQ_H + +/* + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + * + * moved some of the old arch/i386/kernel/irq.h to here. VY + * + * IRQ/IPI changes taken from work by Thomas Radke + * <tomsoft@informatik.tu-chemnitz.de> + * + * hacked by Andi Kleen for x86-64. + * unified by tglx + */ + +#include <asm/irq_vectors.h> + +#ifndef __ASSEMBLY__ + +#include <linux/percpu.h> +#include <linux/profile.h> +#include <linux/smp.h> + +#include <linux/atomic.h> +#include <asm/irq.h> +#include <asm/sections.h> + +/* Interrupt handlers registered during init_IRQ */ +extern void apic_timer_interrupt(void); +extern void x86_platform_ipi(void); +extern void error_interrupt(void); +extern void irq_work_interrupt(void); + +extern void spurious_interrupt(void); +extern void thermal_interrupt(void); +extern void reschedule_interrupt(void); + +extern void invalidate_interrupt(void); +extern void invalidate_interrupt0(void); +extern void invalidate_interrupt1(void); +extern void invalidate_interrupt2(void); +extern void invalidate_interrupt3(void); +extern void invalidate_interrupt4(void); +extern void invalidate_interrupt5(void); +extern void invalidate_interrupt6(void); +extern void invalidate_interrupt7(void); +extern void invalidate_interrupt8(void); +extern void invalidate_interrupt9(void); +extern void invalidate_interrupt10(void); +extern void invalidate_interrupt11(void); +extern void invalidate_interrupt12(void); +extern void invalidate_interrupt13(void); +extern void invalidate_interrupt14(void); +extern void invalidate_interrupt15(void); +extern void invalidate_interrupt16(void); +extern void invalidate_interrupt17(void); +extern void invalidate_interrupt18(void); +extern void invalidate_interrupt19(void); +extern void invalidate_interrupt20(void); +extern void invalidate_interrupt21(void); +extern void invalidate_interrupt22(void); +extern void invalidate_interrupt23(void); +extern void invalidate_interrupt24(void); +extern void invalidate_interrupt25(void); +extern void invalidate_interrupt26(void); +extern void invalidate_interrupt27(void); +extern void invalidate_interrupt28(void); +extern void invalidate_interrupt29(void); +extern void invalidate_interrupt30(void); +extern void invalidate_interrupt31(void); + +extern void irq_move_cleanup_interrupt(void); +extern void reboot_interrupt(void); +extern void threshold_interrupt(void); + +extern void call_function_interrupt(void); +extern void call_function_single_interrupt(void); + +/* IOAPIC */ +#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) +extern unsigned long io_apic_irqs; + +extern void init_VISWS_APIC_irqs(void); +extern void setup_IO_APIC(void); +extern void disable_IO_APIC(void); + +struct io_apic_irq_attr { + int ioapic; + int ioapic_pin; + int trigger; + int polarity; +}; + +static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, + int ioapic, int ioapic_pin, + int trigger, int polarity) +{ + irq_attr->ioapic = ioapic; + irq_attr->ioapic_pin = ioapic_pin; + irq_attr->trigger = trigger; + irq_attr->polarity = polarity; +} + +struct irq_2_iommu { + struct intel_iommu *iommu; + u16 irte_index; + u16 sub_handle; + u8 irte_mask; +}; + +/* + * This is performance-critical, we want to do it O(1) + * + * Most irqs are mapped 1:1 with pins. + */ +struct irq_cfg { + struct irq_pin_list *irq_2_pin; + cpumask_var_t domain; + cpumask_var_t old_domain; + u8 vector; + u8 move_in_progress : 1; +#ifdef CONFIG_IRQ_REMAP + struct irq_2_iommu irq_2_iommu; +#endif +}; + +extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); +extern void send_cleanup_vector(struct irq_cfg *); + +struct irq_data; +int __ioapic_set_affinity(struct irq_data *, const struct cpumask *, + unsigned int *dest_id); +extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); +extern void setup_ioapic_dest(void); + +extern void enable_IO_APIC(void); + +/* Statistics */ +extern atomic_t irq_err_count; +extern atomic_t irq_mis_count; + +/* EISA */ +extern void eisa_set_level_irq(unsigned int irq); + +/* SMP */ +extern void smp_apic_timer_interrupt(struct pt_regs *); +extern void smp_spurious_interrupt(struct pt_regs *); +extern void smp_x86_platform_ipi(struct pt_regs *); +extern void smp_error_interrupt(struct pt_regs *); +#ifdef CONFIG_X86_IO_APIC +extern asmlinkage void smp_irq_move_cleanup_interrupt(void); +#endif +#ifdef CONFIG_SMP +extern void smp_reschedule_interrupt(struct pt_regs *); +extern void smp_call_function_interrupt(struct pt_regs *); +extern void smp_call_function_single_interrupt(struct pt_regs *); +#ifdef CONFIG_X86_32 +extern void smp_invalidate_interrupt(struct pt_regs *); +#else +extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *); +#endif +#endif + +extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); + +typedef int vector_irq_t[NR_VECTORS]; +DECLARE_PER_CPU(vector_irq_t, vector_irq); +extern void setup_vector_irq(int cpu); + +#ifdef CONFIG_X86_IO_APIC +extern void lock_vector_lock(void); +extern void unlock_vector_lock(void); +extern void __setup_vector_irq(int cpu); +#else +static inline void lock_vector_lock(void) {} +static inline void unlock_vector_lock(void) {} +static inline void __setup_vector_irq(int cpu) {} +#endif + +#endif /* !ASSEMBLY_ */ + +#endif /* _ASM_X86_HW_IRQ_H */ diff --git a/arch/x86/include/asm/hypertransport.h b/arch/x86/include/asm/hypertransport.h new file mode 100644 index 00000000000..334b1a885d9 --- /dev/null +++ b/arch/x86/include/asm/hypertransport.h @@ -0,0 +1,45 @@ +#ifndef _ASM_X86_HYPERTRANSPORT_H +#define _ASM_X86_HYPERTRANSPORT_H + +/* + * Constants for x86 Hypertransport Interrupts. + */ + +#define HT_IRQ_LOW_BASE 0xf8000000 + +#define HT_IRQ_LOW_VECTOR_SHIFT 16 +#define HT_IRQ_LOW_VECTOR_MASK 0x00ff0000 +#define HT_IRQ_LOW_VECTOR(v) \ + (((v) << HT_IRQ_LOW_VECTOR_SHIFT) & HT_IRQ_LOW_VECTOR_MASK) + +#define HT_IRQ_LOW_DEST_ID_SHIFT 8 +#define HT_IRQ_LOW_DEST_ID_MASK 0x0000ff00 +#define HT_IRQ_LOW_DEST_ID(v) \ + (((v) << HT_IRQ_LOW_DEST_ID_SHIFT) & HT_IRQ_LOW_DEST_ID_MASK) + +#define HT_IRQ_LOW_DM_PHYSICAL 0x0000000 +#define HT_IRQ_LOW_DM_LOGICAL 0x0000040 + +#define HT_IRQ_LOW_RQEOI_EDGE 0x0000000 +#define HT_IRQ_LOW_RQEOI_LEVEL 0x0000020 + + +#define HT_IRQ_LOW_MT_FIXED 0x0000000 +#define HT_IRQ_LOW_MT_ARBITRATED 0x0000004 +#define HT_IRQ_LOW_MT_SMI 0x0000008 +#define HT_IRQ_LOW_MT_NMI 0x000000c +#define HT_IRQ_LOW_MT_INIT 0x0000010 +#define HT_IRQ_LOW_MT_STARTUP 0x0000014 +#define HT_IRQ_LOW_MT_EXTINT 0x0000018 +#define HT_IRQ_LOW_MT_LINT1 0x000008c +#define HT_IRQ_LOW_MT_LINT0 0x0000098 + +#define HT_IRQ_LOW_IRQ_MASKED 0x0000001 + + +#define HT_IRQ_HIGH_DEST_ID_SHIFT 0 +#define HT_IRQ_HIGH_DEST_ID_MASK 0x00ffffff +#define HT_IRQ_HIGH_DEST_ID(v) \ + ((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK) + +#endif /* _ASM_X86_HYPERTRANSPORT_H */ diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/asm/hyperv.h new file mode 100644 index 00000000000..b80420bcd09 --- /dev/null +++ b/arch/x86/include/asm/hyperv.h @@ -0,0 +1,194 @@ +#ifndef _ASM_X86_HYPERV_H +#define _ASM_X86_HYPERV_H + +#include <linux/types.h> + +/* + * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent + * is set by CPUID(HvCpuIdFunctionVersionAndFeatures). + */ +#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 +#define HYPERV_CPUID_INTERFACE 0x40000001 +#define HYPERV_CPUID_VERSION 0x40000002 +#define HYPERV_CPUID_FEATURES 0x40000003 +#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 +#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 + +#define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 +#define HYPERV_CPUID_MIN 0x40000005 +#define HYPERV_CPUID_MAX 0x4000ffff + +/* + * Feature identification. EAX indicates which features are available + * to the partition based upon the current partition privileges. + */ + +/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */ +#define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0) +/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ +#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) +/* + * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM + * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available + */ +#define HV_X64_MSR_SYNIC_AVAILABLE (1 << 2) +/* + * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through + * HV_X64_MSR_STIMER3_COUNT) available + */ +#define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3) +/* + * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR) + * are available + */ +#define HV_X64_MSR_APIC_ACCESS_AVAILABLE (1 << 4) +/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/ +#define HV_X64_MSR_HYPERCALL_AVAILABLE (1 << 5) +/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/ +#define HV_X64_MSR_VP_INDEX_AVAILABLE (1 << 6) +/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/ +#define HV_X64_MSR_RESET_AVAILABLE (1 << 7) + /* + * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE, + * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE, + * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available + */ +#define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8) + +/* + * Feature identification: EBX indicates which flags were specified at + * partition creation. The format is the same as the partition creation + * flag structure defined in section Partition Creation Flags. + */ +#define HV_X64_CREATE_PARTITIONS (1 << 0) +#define HV_X64_ACCESS_PARTITION_ID (1 << 1) +#define HV_X64_ACCESS_MEMORY_POOL (1 << 2) +#define HV_X64_ADJUST_MESSAGE_BUFFERS (1 << 3) +#define HV_X64_POST_MESSAGES (1 << 4) +#define HV_X64_SIGNAL_EVENTS (1 << 5) +#define HV_X64_CREATE_PORT (1 << 6) +#define HV_X64_CONNECT_PORT (1 << 7) +#define HV_X64_ACCESS_STATS (1 << 8) +#define HV_X64_DEBUGGING (1 << 11) +#define HV_X64_CPU_POWER_MANAGEMENT (1 << 12) +#define HV_X64_CONFIGURE_PROFILER (1 << 13) + +/* + * Feature identification. EDX indicates which miscellaneous features + * are available to the partition. + */ +/* The MWAIT instruction is available (per section MONITOR / MWAIT) */ +#define HV_X64_MWAIT_AVAILABLE (1 << 0) +/* Guest debugging support is available */ +#define HV_X64_GUEST_DEBUGGING_AVAILABLE (1 << 1) +/* Performance Monitor support is available*/ +#define HV_X64_PERF_MONITOR_AVAILABLE (1 << 2) +/* Support for physical CPU dynamic partitioning events is available*/ +#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE (1 << 3) +/* + * Support for passing hypercall input parameter block via XMM + * registers is available + */ +#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4) +/* Support for a virtual guest idle state is available */ +#define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5) + +/* + * Implementation recommendations. Indicates which behaviors the hypervisor + * recommends the OS implement for optimal performance. + */ + /* + * Recommend using hypercall for address space switches rather + * than MOV to CR3 instruction + */ +#define HV_X64_MWAIT_RECOMMENDED (1 << 0) +/* Recommend using hypercall for local TLB flushes rather + * than INVLPG or MOV to CR3 instructions */ +#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED (1 << 1) +/* + * Recommend using hypercall for remote TLB flushes rather + * than inter-processor interrupts + */ +#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED (1 << 2) +/* + * Recommend using MSRs for accessing APIC registers + * EOI, ICR and TPR rather than their memory-mapped counterparts + */ +#define HV_X64_APIC_ACCESS_RECOMMENDED (1 << 3) +/* Recommend using the hypervisor-provided MSR to initiate a system RESET */ +#define HV_X64_SYSTEM_RESET_RECOMMENDED (1 << 4) +/* + * Recommend using relaxed timing for this partition. If used, + * the VM should disable any watchdog timeouts that rely on the + * timely delivery of external interrupts + */ +#define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5) + +/* MSR used to identify the guest OS. */ +#define HV_X64_MSR_GUEST_OS_ID 0x40000000 + +/* MSR used to setup pages used to communicate with the hypervisor. */ +#define HV_X64_MSR_HYPERCALL 0x40000001 + +/* MSR used to provide vcpu index */ +#define HV_X64_MSR_VP_INDEX 0x40000002 + +/* MSR used to read the per-partition time reference counter */ +#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 + +/* Define the virtual APIC registers */ +#define HV_X64_MSR_EOI 0x40000070 +#define HV_X64_MSR_ICR 0x40000071 +#define HV_X64_MSR_TPR 0x40000072 +#define HV_X64_MSR_APIC_ASSIST_PAGE 0x40000073 + +/* Define synthetic interrupt controller model specific registers. */ +#define HV_X64_MSR_SCONTROL 0x40000080 +#define HV_X64_MSR_SVERSION 0x40000081 +#define HV_X64_MSR_SIEFP 0x40000082 +#define HV_X64_MSR_SIMP 0x40000083 +#define HV_X64_MSR_EOM 0x40000084 +#define HV_X64_MSR_SINT0 0x40000090 +#define HV_X64_MSR_SINT1 0x40000091 +#define HV_X64_MSR_SINT2 0x40000092 +#define HV_X64_MSR_SINT3 0x40000093 +#define HV_X64_MSR_SINT4 0x40000094 +#define HV_X64_MSR_SINT5 0x40000095 +#define HV_X64_MSR_SINT6 0x40000096 +#define HV_X64_MSR_SINT7 0x40000097 +#define HV_X64_MSR_SINT8 0x40000098 +#define HV_X64_MSR_SINT9 0x40000099 +#define HV_X64_MSR_SINT10 0x4000009A +#define HV_X64_MSR_SINT11 0x4000009B +#define HV_X64_MSR_SINT12 0x4000009C +#define HV_X64_MSR_SINT13 0x4000009D +#define HV_X64_MSR_SINT14 0x4000009E +#define HV_X64_MSR_SINT15 0x4000009F + + +#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 +#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12 +#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ + (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) + +/* Declare the various hypercall operations. */ +#define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT 0x0008 + +#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001 +#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12 +#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ + (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) + +#define HV_PROCESSOR_POWER_STATE_C0 0 +#define HV_PROCESSOR_POWER_STATE_C1 1 +#define HV_PROCESSOR_POWER_STATE_C2 2 +#define HV_PROCESSOR_POWER_STATE_C3 3 + +/* hypercall status code */ +#define HV_STATUS_SUCCESS 0 +#define HV_STATUS_INVALID_HYPERCALL_CODE 2 +#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 +#define HV_STATUS_INVALID_ALIGNMENT 4 +#define HV_STATUS_INSUFFICIENT_BUFFERS 19 + +#endif diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h new file mode 100644 index 00000000000..7a15153c675 --- /dev/null +++ b/arch/x86/include/asm/hypervisor.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2008, VMware, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#ifndef _ASM_X86_HYPERVISOR_H +#define _ASM_X86_HYPERVISOR_H + +#include <asm/kvm_para.h> +#include <asm/xen/hypervisor.h> + +extern void init_hypervisor(struct cpuinfo_x86 *c); +extern void init_hypervisor_platform(void); + +/* + * x86 hypervisor information + */ +struct hypervisor_x86 { + /* Hypervisor name */ + const char *name; + + /* Detection routine */ + bool (*detect)(void); + + /* Adjust CPU feature bits (run once per CPU) */ + void (*set_cpu_features)(struct cpuinfo_x86 *); + + /* Platform setup (run once per boot) */ + void (*init_platform)(void); +}; + +extern const struct hypervisor_x86 *x86_hyper; + +/* Recognized hypervisors */ +extern const struct hypervisor_x86 x86_hyper_vmware; +extern const struct hypervisor_x86 x86_hyper_ms_hyperv; +extern const struct hypervisor_x86 x86_hyper_xen_hvm; + +static inline bool hypervisor_x2apic_available(void) +{ + if (kvm_para_available()) + return true; + if (xen_x2apic_para_available()) + return true; + return false; +} + +#endif diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h new file mode 100644 index 00000000000..247904945d3 --- /dev/null +++ b/arch/x86/include/asm/i387.h @@ -0,0 +1,650 @@ +/* + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes <gareth@valinux.com>, May 2000 + * x86-64 work by Andi Kleen 2002 + */ + +#ifndef _ASM_X86_I387_H +#define _ASM_X86_I387_H + +#ifndef __ASSEMBLY__ + +#include <linux/sched.h> +#include <linux/kernel_stat.h> +#include <linux/regset.h> +#include <linux/hardirq.h> +#include <linux/slab.h> +#include <asm/asm.h> +#include <asm/cpufeature.h> +#include <asm/processor.h> +#include <asm/sigcontext.h> +#include <asm/user.h> +#include <asm/uaccess.h> +#include <asm/xsave.h> + +extern unsigned int sig_xstate_size; +extern void fpu_init(void); +extern void mxcsr_feature_mask_init(void); +extern int init_fpu(struct task_struct *child); +extern void math_state_restore(void); +extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); + +DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); + +extern user_regset_active_fn fpregs_active, xfpregs_active; +extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, + xstateregs_get; +extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, + xstateregs_set; + +/* + * xstateregs_active == fpregs_active. Please refer to the comment + * at the definition of fpregs_active. + */ +#define xstateregs_active fpregs_active + +extern struct _fpx_sw_bytes fx_sw_reserved; +#ifdef CONFIG_IA32_EMULATION +extern unsigned int sig_xstate_ia32_size; +extern struct _fpx_sw_bytes fx_sw_reserved_ia32; +struct _fpstate_ia32; +struct _xstate_ia32; +extern int save_i387_xstate_ia32(void __user *buf); +extern int restore_i387_xstate_ia32(void __user *buf); +#endif + +#ifdef CONFIG_MATH_EMULATION +extern void finit_soft_fpu(struct i387_soft_struct *soft); +#else +static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} +#endif + +#define X87_FSW_ES (1 << 7) /* Exception Summary */ + +static __always_inline __pure bool use_xsaveopt(void) +{ + return static_cpu_has(X86_FEATURE_XSAVEOPT); +} + +static __always_inline __pure bool use_xsave(void) +{ + return static_cpu_has(X86_FEATURE_XSAVE); +} + +static __always_inline __pure bool use_fxsr(void) +{ + return static_cpu_has(X86_FEATURE_FXSR); +} + +extern void __sanitize_i387_state(struct task_struct *); + +static inline void sanitize_i387_state(struct task_struct *tsk) +{ + if (!use_xsaveopt()) + return; + __sanitize_i387_state(tsk); +} + +#ifdef CONFIG_X86_64 +static inline int fxrstor_checking(struct i387_fxsave_struct *fx) +{ + int err; + + /* See comment in fxsave() below. */ +#ifdef CONFIG_AS_FXSAVEQ + asm volatile("1: fxrstorq %[fx]\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [err] "=r" (err) + : [fx] "m" (*fx), "0" (0)); +#else + asm volatile("1: rex64/fxrstor (%[fx])\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [err] "=r" (err) + : [fx] "R" (fx), "m" (*fx), "0" (0)); +#endif + return err; +} + +static inline int fxsave_user(struct i387_fxsave_struct __user *fx) +{ + int err; + + /* + * Clear the bytes not touched by the fxsave and reserved + * for the SW usage. + */ + err = __clear_user(&fx->sw_reserved, + sizeof(struct _fpx_sw_bytes)); + if (unlikely(err)) + return -EFAULT; + + /* See comment in fxsave() below. */ +#ifdef CONFIG_AS_FXSAVEQ + asm volatile("1: fxsaveq %[fx]\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [err] "=r" (err), [fx] "=m" (*fx) + : "0" (0)); +#else + asm volatile("1: rex64/fxsave (%[fx])\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [err] "=r" (err), "=m" (*fx) + : [fx] "R" (fx), "0" (0)); +#endif + if (unlikely(err) && + __clear_user(fx, sizeof(struct i387_fxsave_struct))) + err = -EFAULT; + /* No need to clear here because the caller clears USED_MATH */ + return err; +} + +static inline void fpu_fxsave(struct fpu *fpu) +{ + /* Using "rex64; fxsave %0" is broken because, if the memory operand + uses any extended registers for addressing, a second REX prefix + will be generated (to the assembler, rex64 followed by semicolon + is a separate instruction), and hence the 64-bitness is lost. */ + +#ifdef CONFIG_AS_FXSAVEQ + /* Using "fxsaveq %0" would be the ideal choice, but is only supported + starting with gas 2.16. */ + __asm__ __volatile__("fxsaveq %0" + : "=m" (fpu->state->fxsave)); +#else + /* Using, as a workaround, the properly prefixed form below isn't + accepted by any binutils version so far released, complaining that + the same type of prefix is used twice if an extended register is + needed for addressing (fix submitted to mainline 2005-11-21). + asm volatile("rex64/fxsave %0" + : "=m" (fpu->state->fxsave)); + This, however, we can work around by forcing the compiler to select + an addressing mode that doesn't require extended registers. */ + asm volatile("rex64/fxsave (%[fx])" + : "=m" (fpu->state->fxsave) + : [fx] "R" (&fpu->state->fxsave)); +#endif +} + +#else /* CONFIG_X86_32 */ + +/* perform fxrstor iff the processor has extended states, otherwise frstor */ +static inline int fxrstor_checking(struct i387_fxsave_struct *fx) +{ + /* + * The "nop" is needed to make the instructions the same + * length. + */ + alternative_input( + "nop ; frstor %1", + "fxrstor %1", + X86_FEATURE_FXSR, + "m" (*fx)); + + return 0; +} + +static inline void fpu_fxsave(struct fpu *fpu) +{ + asm volatile("fxsave %[fx]" + : [fx] "=m" (fpu->state->fxsave)); +} + +#endif /* CONFIG_X86_64 */ + +/* + * These must be called with preempt disabled. Returns + * 'true' if the FPU state is still intact. + */ +static inline int fpu_save_init(struct fpu *fpu) +{ + if (use_xsave()) { + fpu_xsave(fpu); + + /* + * xsave header may indicate the init state of the FP. + */ + if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) + return 1; + } else if (use_fxsr()) { + fpu_fxsave(fpu); + } else { + asm volatile("fnsave %[fx]; fwait" + : [fx] "=m" (fpu->state->fsave)); + return 0; + } + + /* + * If exceptions are pending, we need to clear them so + * that we don't randomly get exceptions later. + * + * FIXME! Is this perhaps only true for the old-style + * irq13 case? Maybe we could leave the x87 state + * intact otherwise? + */ + if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { + asm volatile("fnclex"); + return 0; + } + return 1; +} + +static inline int __save_init_fpu(struct task_struct *tsk) +{ + return fpu_save_init(&tsk->thread.fpu); +} + +static inline int fpu_fxrstor_checking(struct fpu *fpu) +{ + return fxrstor_checking(&fpu->state->fxsave); +} + +static inline int fpu_restore_checking(struct fpu *fpu) +{ + if (use_xsave()) + return fpu_xrstor_checking(fpu); + else + return fpu_fxrstor_checking(fpu); +} + +static inline int restore_fpu_checking(struct task_struct *tsk) +{ + /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception + is pending. Clear the x87 state here by setting it to fixed + values. "m" is a random variable that should be in L1 */ + alternative_input( + ASM_NOP8 ASM_NOP2, + "emms\n\t" /* clear stack tags */ + "fildl %P[addr]", /* set F?P to defined value */ + X86_FEATURE_FXSAVE_LEAK, + [addr] "m" (tsk->thread.fpu.has_fpu)); + + return fpu_restore_checking(&tsk->thread.fpu); +} + +/* + * Software FPU state helpers. Careful: these need to + * be preemption protection *and* they need to be + * properly paired with the CR0.TS changes! + */ +static inline int __thread_has_fpu(struct task_struct *tsk) +{ + return tsk->thread.fpu.has_fpu; +} + +/* Must be paired with an 'stts' after! */ +static inline void __thread_clear_has_fpu(struct task_struct *tsk) +{ + tsk->thread.fpu.has_fpu = 0; + percpu_write(fpu_owner_task, NULL); +} + +/* Must be paired with a 'clts' before! */ +static inline void __thread_set_has_fpu(struct task_struct *tsk) +{ + tsk->thread.fpu.has_fpu = 1; + percpu_write(fpu_owner_task, tsk); +} + +/* + * Encapsulate the CR0.TS handling together with the + * software flag. + * + * These generally need preemption protection to work, + * do try to avoid using these on their own. + */ +static inline void __thread_fpu_end(struct task_struct *tsk) +{ + __thread_clear_has_fpu(tsk); + stts(); +} + +static inline void __thread_fpu_begin(struct task_struct *tsk) +{ + clts(); + __thread_set_has_fpu(tsk); +} + +/* + * FPU state switching for scheduling. + * + * This is a two-stage process: + * + * - switch_fpu_prepare() saves the old state and + * sets the new state of the CR0.TS bit. This is + * done within the context of the old process. + * + * - switch_fpu_finish() restores the new state as + * necessary. + */ +typedef struct { int preload; } fpu_switch_t; + +/* + * FIXME! We could do a totally lazy restore, but we need to + * add a per-cpu "this was the task that last touched the FPU + * on this CPU" variable, and the task needs to have a "I last + * touched the FPU on this CPU" and check them. + * + * We don't do that yet, so "fpu_lazy_restore()" always returns + * false, but some day.. + */ +static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) +{ + return new == percpu_read_stable(fpu_owner_task) && + cpu == new->thread.fpu.last_cpu; +} + +static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) +{ + fpu_switch_t fpu; + + fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; + if (__thread_has_fpu(old)) { + if (!__save_init_fpu(old)) + cpu = ~0; + old->thread.fpu.last_cpu = cpu; + old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ + + /* Don't change CR0.TS if we just switch! */ + if (fpu.preload) { + new->fpu_counter++; + __thread_set_has_fpu(new); + prefetch(new->thread.fpu.state); + } else + stts(); + } else { + old->fpu_counter = 0; + old->thread.fpu.last_cpu = ~0; + if (fpu.preload) { + new->fpu_counter++; + if (fpu_lazy_restore(new, cpu)) + fpu.preload = 0; + else + prefetch(new->thread.fpu.state); + __thread_fpu_begin(new); + } + } + return fpu; +} + +/* + * By the time this gets called, we've already cleared CR0.TS and + * given the process the FPU if we are going to preload the FPU + * state - all we need to do is to conditionally restore the register + * state itself. + */ +static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) +{ + if (fpu.preload) { + if (unlikely(restore_fpu_checking(new))) + __thread_fpu_end(new); + } +} + +/* + * Signal frame handlers... + */ +extern int save_i387_xstate(void __user *buf); +extern int restore_i387_xstate(void __user *buf); + +static inline void __clear_fpu(struct task_struct *tsk) +{ + if (__thread_has_fpu(tsk)) { + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); + __thread_fpu_end(tsk); + } +} + +/* + * Were we in an interrupt that interrupted kernel mode? + * + * We can do a kernel_fpu_begin/end() pair *ONLY* if that + * pair does nothing at all: the thread must not have fpu (so + * that we don't try to save the FPU state), and TS must + * be set (so that the clts/stts pair does nothing that is + * visible in the interrupted kernel thread). + */ +static inline bool interrupted_kernel_fpu_idle(void) +{ + return !__thread_has_fpu(current) && + (read_cr0() & X86_CR0_TS); +} + +/* + * Were we in user mode (or vm86 mode) when we were + * interrupted? + * + * Doing kernel_fpu_begin/end() is ok if we are running + * in an interrupt context from user mode - we'll just + * save the FPU state as required. + */ +static inline bool interrupted_user_mode(void) +{ + struct pt_regs *regs = get_irq_regs(); + return regs && user_mode_vm(regs); +} + +/* + * Can we use the FPU in kernel mode with the + * whole "kernel_fpu_begin/end()" sequence? + * + * It's always ok in process context (ie "not interrupt") + * but it is sometimes ok even from an irq. + */ +static inline bool irq_fpu_usable(void) +{ + return !in_interrupt() || + interrupted_user_mode() || + interrupted_kernel_fpu_idle(); +} + +static inline void kernel_fpu_begin(void) +{ + struct task_struct *me = current; + + WARN_ON_ONCE(!irq_fpu_usable()); + preempt_disable(); + if (__thread_has_fpu(me)) { + __save_init_fpu(me); + __thread_clear_has_fpu(me); + /* We do 'stts()' in kernel_fpu_end() */ + } else { + percpu_write(fpu_owner_task, NULL); + clts(); + } +} + +static inline void kernel_fpu_end(void) +{ + stts(); + preempt_enable(); +} + +/* + * Some instructions like VIA's padlock instructions generate a spurious + * DNA fault but don't modify SSE registers. And these instructions + * get used from interrupt context as well. To prevent these kernel instructions + * in interrupt context interacting wrongly with other user/kernel fpu usage, we + * should use them only in the context of irq_ts_save/restore() + */ +static inline int irq_ts_save(void) +{ + /* + * If in process context and not atomic, we can take a spurious DNA fault. + * Otherwise, doing clts() in process context requires disabling preemption + * or some heavy lifting like kernel_fpu_begin() + */ + if (!in_atomic()) + return 0; + + if (read_cr0() & X86_CR0_TS) { + clts(); + return 1; + } + + return 0; +} + +static inline void irq_ts_restore(int TS_state) +{ + if (TS_state) + stts(); +} + +/* + * The question "does this thread have fpu access?" + * is slightly racy, since preemption could come in + * and revoke it immediately after the test. + * + * However, even in that very unlikely scenario, + * we can just assume we have FPU access - typically + * to save the FP state - we'll just take a #NM + * fault and get the FPU access back. + * + * The actual user_fpu_begin/end() functions + * need to be preemption-safe, though. + * + * NOTE! user_fpu_end() must be used only after you + * have saved the FP state, and user_fpu_begin() must + * be used only immediately before restoring it. + * These functions do not do any save/restore on + * their own. + */ +static inline int user_has_fpu(void) +{ + return __thread_has_fpu(current); +} + +static inline void user_fpu_end(void) +{ + preempt_disable(); + __thread_fpu_end(current); + preempt_enable(); +} + +static inline void user_fpu_begin(void) +{ + preempt_disable(); + if (!user_has_fpu()) + __thread_fpu_begin(current); + preempt_enable(); +} + +/* + * These disable preemption on their own and are safe + */ +static inline void save_init_fpu(struct task_struct *tsk) +{ + WARN_ON_ONCE(!__thread_has_fpu(tsk)); + preempt_disable(); + __save_init_fpu(tsk); + __thread_fpu_end(tsk); + preempt_enable(); +} + +static inline void unlazy_fpu(struct task_struct *tsk) +{ + preempt_disable(); + if (__thread_has_fpu(tsk)) { + __save_init_fpu(tsk); + __thread_fpu_end(tsk); + } else + tsk->fpu_counter = 0; + preempt_enable(); +} + +static inline void clear_fpu(struct task_struct *tsk) +{ + preempt_disable(); + __clear_fpu(tsk); + preempt_enable(); +} + +/* + * i387 state interaction + */ +static inline unsigned short get_fpu_cwd(struct task_struct *tsk) +{ + if (cpu_has_fxsr) { + return tsk->thread.fpu.state->fxsave.cwd; + } else { + return (unsigned short)tsk->thread.fpu.state->fsave.cwd; + } +} + +static inline unsigned short get_fpu_swd(struct task_struct *tsk) +{ + if (cpu_has_fxsr) { + return tsk->thread.fpu.state->fxsave.swd; + } else { + return (unsigned short)tsk->thread.fpu.state->fsave.swd; + } +} + +static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) +{ + if (cpu_has_xmm) { + return tsk->thread.fpu.state->fxsave.mxcsr; + } else { + return MXCSR_DEFAULT; + } +} + +static bool fpu_allocated(struct fpu *fpu) +{ + return fpu->state != NULL; +} + +static inline int fpu_alloc(struct fpu *fpu) +{ + if (fpu_allocated(fpu)) + return 0; + fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); + if (!fpu->state) + return -ENOMEM; + WARN_ON((unsigned long)fpu->state & 15); + return 0; +} + +static inline void fpu_free(struct fpu *fpu) +{ + if (fpu->state) { + kmem_cache_free(task_xstate_cachep, fpu->state); + fpu->state = NULL; + } +} + +static inline void fpu_copy(struct fpu *dst, struct fpu *src) +{ + memcpy(dst->state, src->state, xstate_size); +} + +extern void fpu_finit(struct fpu *fpu); + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_I387_H */ diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h new file mode 100644 index 00000000000..a20365953bf --- /dev/null +++ b/arch/x86/include/asm/i8259.h @@ -0,0 +1,70 @@ +#ifndef _ASM_X86_I8259_H +#define _ASM_X86_I8259_H + +#include <linux/delay.h> + +extern unsigned int cached_irq_mask; + +#define __byte(x, y) (((unsigned char *)&(y))[x]) +#define cached_master_mask (__byte(0, cached_irq_mask)) +#define cached_slave_mask (__byte(1, cached_irq_mask)) + +/* i8259A PIC registers */ +#define PIC_MASTER_CMD 0x20 +#define PIC_MASTER_IMR 0x21 +#define PIC_MASTER_ISR PIC_MASTER_CMD +#define PIC_MASTER_POLL PIC_MASTER_ISR +#define PIC_MASTER_OCW3 PIC_MASTER_ISR +#define PIC_SLAVE_CMD 0xa0 +#define PIC_SLAVE_IMR 0xa1 + +/* i8259A PIC related value */ +#define PIC_CASCADE_IR 2 +#define MASTER_ICW4_DEFAULT 0x01 +#define SLAVE_ICW4_DEFAULT 0x01 +#define PIC_ICW4_AEOI 2 + +extern raw_spinlock_t i8259A_lock; + +/* the PIC may need a careful delay on some platforms, hence specific calls */ +static inline unsigned char inb_pic(unsigned int port) +{ + unsigned char value = inb(port); + + /* + * delay for some accesses to PIC on motherboard or in chipset + * must be at least one microsecond, so be safe here: + */ + udelay(2); + + return value; +} + +static inline void outb_pic(unsigned char value, unsigned int port) +{ + outb(value, port); + /* + * delay for some accesses to PIC on motherboard or in chipset + * must be at least one microsecond, so be safe here: + */ + udelay(2); +} + +extern struct irq_chip i8259A_chip; + +struct legacy_pic { + int nr_legacy_irqs; + struct irq_chip *chip; + void (*mask)(unsigned int irq); + void (*unmask)(unsigned int irq); + void (*mask_all)(void); + void (*restore_mask)(void); + void (*init)(int auto_eoi); + int (*irq_pending)(unsigned int irq); + void (*make_irq)(unsigned int irq); +}; + +extern struct legacy_pic *legacy_pic; +extern struct legacy_pic null_legacy_pic; + +#endif /* _ASM_X86_I8259_H */ diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h new file mode 100644 index 00000000000..1f7e6251728 --- /dev/null +++ b/arch/x86/include/asm/ia32.h @@ -0,0 +1,145 @@ +#ifndef _ASM_X86_IA32_H +#define _ASM_X86_IA32_H + + +#ifdef CONFIG_IA32_EMULATION + +#include <linux/compat.h> + +/* + * 32 bit structures for IA32 support. + */ + +#include <asm/sigcontext32.h> + +/* signal.h */ +struct sigaction32 { + unsigned int sa_handler; /* Really a pointer, but need to deal + with 32 bits */ + unsigned int sa_flags; + unsigned int sa_restorer; /* Another 32 bit pointer */ + compat_sigset_t sa_mask; /* A 32 bit mask */ +}; + +struct old_sigaction32 { + unsigned int sa_handler; /* Really a pointer, but need to deal + with 32 bits */ + compat_old_sigset_t sa_mask; /* A 32 bit mask */ + unsigned int sa_flags; + unsigned int sa_restorer; /* Another 32 bit pointer */ +}; + +typedef struct sigaltstack_ia32 { + unsigned int ss_sp; + int ss_flags; + unsigned int ss_size; +} stack_ia32_t; + +struct ucontext_ia32 { + unsigned int uc_flags; + unsigned int uc_link; + stack_ia32_t uc_stack; + struct sigcontext_ia32 uc_mcontext; + compat_sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +/* This matches struct stat64 in glibc2.2, hence the absolutely + * insane amounts of padding around dev_t's. + */ +struct stat64 { + unsigned long long st_dev; + unsigned char __pad0[4]; + +#define STAT64_HAS_BROKEN_ST_INO 1 + unsigned int __st_ino; + + unsigned int st_mode; + unsigned int st_nlink; + + unsigned int st_uid; + unsigned int st_gid; + + unsigned long long st_rdev; + unsigned char __pad3[4]; + + long long st_size; + unsigned int st_blksize; + + long long st_blocks;/* Number 512-byte blocks allocated */ + + unsigned st_atime; + unsigned st_atime_nsec; + unsigned st_mtime; + unsigned st_mtime_nsec; + unsigned st_ctime; + unsigned st_ctime_nsec; + + unsigned long long st_ino; +} __attribute__((packed)); + +typedef struct compat_siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[((128 / sizeof(int)) - 3)]; + + /* kill() */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + compat_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + int _overrun_incr; /* amount to add to overrun */ + } _timer; + + /* POSIX.1b signals */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + unsigned int _pid; /* which child */ + unsigned int _uid; /* sender's uid */ + int _status; /* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + unsigned int _addr; /* faulting insn/memory ref. */ + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} compat_siginfo_t; + +#define IA32_STACK_TOP IA32_PAGE_OFFSET + +#ifdef __KERNEL__ +struct linux_binprm; +extern int ia32_setup_arg_pages(struct linux_binprm *bprm, + unsigned long stack_top, int exec_stack); +struct mm_struct; +extern void ia32_pick_mmap_layout(struct mm_struct *mm); + +#endif + +#endif /* !CONFIG_IA32_SUPPORT */ + +#endif /* _ASM_X86_IA32_H */ diff --git a/arch/x86/include/asm/ia32_unistd.h b/arch/x86/include/asm/ia32_unistd.h new file mode 100644 index 00000000000..b0d5716ca1e --- /dev/null +++ b/arch/x86/include/asm/ia32_unistd.h @@ -0,0 +1,11 @@ +#ifndef _ASM_X86_IA32_UNISTD_H +#define _ASM_X86_IA32_UNISTD_H + +/* + * This file contains the system call numbers of the ia32 compat ABI, + * this is for the kernel only. + */ +#define __SYSCALL_ia32_NR(x) (x) +#include <asm/unistd_32_ia32.h> + +#endif /* _ASM_X86_IA32_UNISTD_H */ diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h new file mode 100644 index 00000000000..f49253d7571 --- /dev/null +++ b/arch/x86/include/asm/idle.h @@ -0,0 +1,21 @@ +#ifndef _ASM_X86_IDLE_H +#define _ASM_X86_IDLE_H + +#define IDLE_START 1 +#define IDLE_END 2 + +struct notifier_block; +void idle_notifier_register(struct notifier_block *n); +void idle_notifier_unregister(struct notifier_block *n); + +#ifdef CONFIG_X86_64 +void enter_idle(void); +void exit_idle(void); +#else /* !CONFIG_X86_64 */ +static inline void enter_idle(void) { } +static inline void exit_idle(void) { } +#endif /* CONFIG_X86_64 */ + +void amd_e400_remove_cpu(int cpu); + +#endif /* _ASM_X86_IDLE_H */ diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h new file mode 100644 index 00000000000..205b063e3e3 --- /dev/null +++ b/arch/x86/include/asm/inat.h @@ -0,0 +1,220 @@ +#ifndef _ASM_X86_INAT_H +#define _ASM_X86_INAT_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include <asm/inat_types.h> + +/* + * Internal bits. Don't use bitmasks directly, because these bits are + * unstable. You should use checking functions. + */ + +#define INAT_OPCODE_TABLE_SIZE 256 +#define INAT_GROUP_TABLE_SIZE 8 + +/* Legacy last prefixes */ +#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ +#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ +#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ +/* Other Legacy prefixes */ +#define INAT_PFX_LOCK 4 /* 0xF0 */ +#define INAT_PFX_CS 5 /* 0x2E */ +#define INAT_PFX_DS 6 /* 0x3E */ +#define INAT_PFX_ES 7 /* 0x26 */ +#define INAT_PFX_FS 8 /* 0x64 */ +#define INAT_PFX_GS 9 /* 0x65 */ +#define INAT_PFX_SS 10 /* 0x36 */ +#define INAT_PFX_ADDRSZ 11 /* 0x67 */ +/* x86-64 REX prefix */ +#define INAT_PFX_REX 12 /* 0x4X */ +/* AVX VEX prefixes */ +#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ +#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ + +#define INAT_LSTPFX_MAX 3 +#define INAT_LGCPFX_MAX 11 + +/* Immediate size */ +#define INAT_IMM_BYTE 1 +#define INAT_IMM_WORD 2 +#define INAT_IMM_DWORD 3 +#define INAT_IMM_QWORD 4 +#define INAT_IMM_PTR 5 +#define INAT_IMM_VWORD32 6 +#define INAT_IMM_VWORD 7 + +/* Legacy prefix */ +#define INAT_PFX_OFFS 0 +#define INAT_PFX_BITS 4 +#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) +#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) +/* Escape opcodes */ +#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) +#define INAT_ESC_BITS 2 +#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) +#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) +/* Group opcodes (1-16) */ +#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) +#define INAT_GRP_BITS 5 +#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) +#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) +/* Immediates */ +#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) +#define INAT_IMM_BITS 3 +#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) +/* Flags */ +#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) +#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) +#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) +#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) +#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) +#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) +#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) +#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) +/* Attribute making macros for attribute tables */ +#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) +#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) +#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) +#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) + +/* Attribute search APIs */ +extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, + insn_byte_t last_pfx, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, + insn_byte_t last_pfx, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, + insn_byte_t vex_m, + insn_byte_t vex_pp); + +/* Attribute checking functions */ +static inline int inat_is_legacy_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr && attr <= INAT_LGCPFX_MAX; +} + +static inline int inat_is_address_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; +} + +static inline int inat_is_operand_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; +} + +static inline int inat_is_rex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_REX; +} + +static inline int inat_last_prefix_id(insn_attr_t attr) +{ + if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) + return 0; + else + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_vex_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; +} + +static inline int inat_is_vex3_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; +} + +static inline int inat_is_escape(insn_attr_t attr) +{ + return attr & INAT_ESC_MASK; +} + +static inline int inat_escape_id(insn_attr_t attr) +{ + return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; +} + +static inline int inat_is_group(insn_attr_t attr) +{ + return attr & INAT_GRP_MASK; +} + +static inline int inat_group_id(insn_attr_t attr) +{ + return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; +} + +static inline int inat_group_common_attribute(insn_attr_t attr) +{ + return attr & ~INAT_GRP_MASK; +} + +static inline int inat_has_immediate(insn_attr_t attr) +{ + return attr & INAT_IMM_MASK; +} + +static inline int inat_immediate_size(insn_attr_t attr) +{ + return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; +} + +static inline int inat_has_modrm(insn_attr_t attr) +{ + return attr & INAT_MODRM; +} + +static inline int inat_is_force64(insn_attr_t attr) +{ + return attr & INAT_FORCE64; +} + +static inline int inat_has_second_immediate(insn_attr_t attr) +{ + return attr & INAT_SCNDIMM; +} + +static inline int inat_has_moffset(insn_attr_t attr) +{ + return attr & INAT_MOFFSET; +} + +static inline int inat_has_variant(insn_attr_t attr) +{ + return attr & INAT_VARIANT; +} + +static inline int inat_accept_vex(insn_attr_t attr) +{ + return attr & INAT_VEXOK; +} + +static inline int inat_must_vex(insn_attr_t attr) +{ + return attr & INAT_VEXONLY; +} +#endif diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h new file mode 100644 index 00000000000..cb3c20ce39c --- /dev/null +++ b/arch/x86/include/asm/inat_types.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_INAT_TYPES_H +#define _ASM_X86_INAT_TYPES_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +/* Instruction attributes */ +typedef unsigned int insn_attr_t; +typedef unsigned char insn_byte_t; +typedef signed int insn_value_t; + +#endif diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h new file mode 100644 index 00000000000..adcc0ae73d0 --- /dev/null +++ b/arch/x86/include/asm/init.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_INIT_32_H +#define _ASM_X86_INIT_32_H + +#ifdef CONFIG_X86_32 +extern void __init early_ioremap_page_table_range_init(void); +#endif + +extern void __init zone_sizes_init(void); + +extern unsigned long __init +kernel_physical_mapping_init(unsigned long start, + unsigned long end, + unsigned long page_size_mask); + + +extern unsigned long __initdata pgt_buf_start; +extern unsigned long __meminitdata pgt_buf_end; +extern unsigned long __meminitdata pgt_buf_top; + +#endif /* _ASM_X86_INIT_32_H */ diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h new file mode 100644 index 00000000000..74df3f1eddf --- /dev/null +++ b/arch/x86/include/asm/insn.h @@ -0,0 +1,193 @@ +#ifndef _ASM_X86_INSN_H +#define _ASM_X86_INSN_H +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +/* insn_attr_t is defined in inat.h */ +#include <asm/inat.h> + +struct insn_field { + union { + insn_value_t value; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +struct insn { + struct insn_field prefixes; /* + * Prefixes + * prefixes.bytes[3]: last prefix + */ + struct insn_field rex_prefix; /* REX prefix */ + struct insn_field vex_prefix; /* VEX prefix */ + struct insn_field opcode; /* + * opcode.bytes[0]: opcode1 + * opcode.bytes[1]: opcode2 + * opcode.bytes[2]: opcode3 + */ + struct insn_field modrm; + struct insn_field sib; + struct insn_field displacement; + union { + struct insn_field immediate; + struct insn_field moffset1; /* for 64bit MOV */ + struct insn_field immediate1; /* for 64bit imm or off16/32 */ + }; + union { + struct insn_field moffset2; /* for 64bit MOV */ + struct insn_field immediate2; /* for 64bit imm or seg16 */ + }; + + insn_attr_t attr; + unsigned char opnd_bytes; + unsigned char addr_bytes; + unsigned char length; + unsigned char x86_64; + + const insn_byte_t *kaddr; /* kernel address of insn to analyze */ + const insn_byte_t *next_byte; +}; + +#define MAX_INSN_SIZE 16 + +#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) +#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) +#define X86_MODRM_RM(modrm) ((modrm) & 0x07) + +#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) +#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) +#define X86_SIB_BASE(sib) ((sib) & 0x07) + +#define X86_REX_W(rex) ((rex) & 8) +#define X86_REX_R(rex) ((rex) & 4) +#define X86_REX_X(rex) ((rex) & 2) +#define X86_REX_B(rex) ((rex) & 1) + +/* VEX bit flags */ +#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ +#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ +#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ +#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ +#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ +/* VEX bit fields */ +#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ +#define X86_VEX2_M 1 /* VEX2.M always 1 */ +#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ + +/* The last prefix is needed for two-byte and three-byte opcodes */ +static inline insn_byte_t insn_last_prefix(struct insn *insn) +{ + return insn->prefixes.bytes[3]; +} + +extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); +extern void insn_get_prefixes(struct insn *insn); +extern void insn_get_opcode(struct insn *insn); +extern void insn_get_modrm(struct insn *insn); +extern void insn_get_sib(struct insn *insn); +extern void insn_get_displacement(struct insn *insn); +extern void insn_get_immediate(struct insn *insn); +extern void insn_get_length(struct insn *insn); + +/* Attribute will be determined after getting ModRM (for opcode groups) */ +static inline void insn_get_attribute(struct insn *insn) +{ + insn_get_modrm(insn); +} + +/* Instruction uses RIP-relative addressing */ +extern int insn_rip_relative(struct insn *insn); + +/* Init insn for kernel text */ +static inline void kernel_insn_init(struct insn *insn, const void *kaddr) +{ +#ifdef CONFIG_X86_64 + insn_init(insn, kaddr, 1); +#else /* CONFIG_X86_32 */ + insn_init(insn, kaddr, 0); +#endif +} + +static inline int insn_is_avx(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.value != 0); +} + +/* Ensure this instruction is decoded completely */ +static inline int insn_complete(struct insn *insn) +{ + return insn->opcode.got && insn->modrm.got && insn->sib.got && + insn->displacement.got && insn->immediate.got; +} + +static inline insn_byte_t insn_vex_m_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX2_M; + else + return X86_VEX3_M(insn->vex_prefix.bytes[1]); +} + +static inline insn_byte_t insn_vex_p_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX_P(insn->vex_prefix.bytes[1]); + else + return X86_VEX_P(insn->vex_prefix.bytes[2]); +} + +/* Offset of each field from kaddr */ +static inline int insn_offset_rex_prefix(struct insn *insn) +{ + return insn->prefixes.nbytes; +} +static inline int insn_offset_vex_prefix(struct insn *insn) +{ + return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; +} +static inline int insn_offset_opcode(struct insn *insn) +{ + return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; +} +static inline int insn_offset_modrm(struct insn *insn) +{ + return insn_offset_opcode(insn) + insn->opcode.nbytes; +} +static inline int insn_offset_sib(struct insn *insn) +{ + return insn_offset_modrm(insn) + insn->modrm.nbytes; +} +static inline int insn_offset_displacement(struct insn *insn) +{ + return insn_offset_sib(insn) + insn->sib.nbytes; +} +static inline int insn_offset_immediate(struct insn *insn) +{ + return insn_offset_displacement(insn) + insn->displacement.nbytes; +} + +#endif /* _ASM_X86_INSN_H */ diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h new file mode 100644 index 00000000000..280bf7fb6ab --- /dev/null +++ b/arch/x86/include/asm/inst.h @@ -0,0 +1,240 @@ +/* + * Generate .byte code for some instructions not supported by old + * binutils. + */ +#ifndef X86_ASM_INST_H +#define X86_ASM_INST_H + +#ifdef __ASSEMBLY__ + +#define REG_NUM_INVALID 100 + +#define REG_TYPE_R64 0 +#define REG_TYPE_XMM 1 +#define REG_TYPE_INVALID 100 + + .macro R64_NUM opd r64 + \opd = REG_NUM_INVALID + .ifc \r64,%rax + \opd = 0 + .endif + .ifc \r64,%rcx + \opd = 1 + .endif + .ifc \r64,%rdx + \opd = 2 + .endif + .ifc \r64,%rbx + \opd = 3 + .endif + .ifc \r64,%rsp + \opd = 4 + .endif + .ifc \r64,%rbp + \opd = 5 + .endif + .ifc \r64,%rsi + \opd = 6 + .endif + .ifc \r64,%rdi + \opd = 7 + .endif + .ifc \r64,%r8 + \opd = 8 + .endif + .ifc \r64,%r9 + \opd = 9 + .endif + .ifc \r64,%r10 + \opd = 10 + .endif + .ifc \r64,%r11 + \opd = 11 + .endif + .ifc \r64,%r12 + \opd = 12 + .endif + .ifc \r64,%r13 + \opd = 13 + .endif + .ifc \r64,%r14 + \opd = 14 + .endif + .ifc \r64,%r15 + \opd = 15 + .endif + .endm + + .macro XMM_NUM opd xmm + \opd = REG_NUM_INVALID + .ifc \xmm,%xmm0 + \opd = 0 + .endif + .ifc \xmm,%xmm1 + \opd = 1 + .endif + .ifc \xmm,%xmm2 + \opd = 2 + .endif + .ifc \xmm,%xmm3 + \opd = 3 + .endif + .ifc \xmm,%xmm4 + \opd = 4 + .endif + .ifc \xmm,%xmm5 + \opd = 5 + .endif + .ifc \xmm,%xmm6 + \opd = 6 + .endif + .ifc \xmm,%xmm7 + \opd = 7 + .endif + .ifc \xmm,%xmm8 + \opd = 8 + .endif + .ifc \xmm,%xmm9 + \opd = 9 + .endif + .ifc \xmm,%xmm10 + \opd = 10 + .endif + .ifc \xmm,%xmm11 + \opd = 11 + .endif + .ifc \xmm,%xmm12 + \opd = 12 + .endif + .ifc \xmm,%xmm13 + \opd = 13 + .endif + .ifc \xmm,%xmm14 + \opd = 14 + .endif + .ifc \xmm,%xmm15 + \opd = 15 + .endif + .endm + + .macro REG_TYPE type reg + R64_NUM reg_type_r64 \reg + XMM_NUM reg_type_xmm \reg + .if reg_type_r64 <> REG_NUM_INVALID + \type = REG_TYPE_R64 + .elseif reg_type_xmm <> REG_NUM_INVALID + \type = REG_TYPE_XMM + .else + \type = REG_TYPE_INVALID + .endif + .endm + + .macro PFX_OPD_SIZE + .byte 0x66 + .endm + + .macro PFX_REX opd1 opd2 W=0 + .if ((\opd1 | \opd2) & 8) || \W + .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3) + .endif + .endm + + .macro MODRM mod opd1 opd2 + .byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3) + .endm + + .macro PSHUFB_XMM xmm1 xmm2 + XMM_NUM pshufb_opd1 \xmm1 + XMM_NUM pshufb_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX pshufb_opd1 pshufb_opd2 + .byte 0x0f, 0x38, 0x00 + MODRM 0xc0 pshufb_opd1 pshufb_opd2 + .endm + + .macro PCLMULQDQ imm8 xmm1 xmm2 + XMM_NUM clmul_opd1 \xmm1 + XMM_NUM clmul_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX clmul_opd1 clmul_opd2 + .byte 0x0f, 0x3a, 0x44 + MODRM 0xc0 clmul_opd1 clmul_opd2 + .byte \imm8 + .endm + + .macro AESKEYGENASSIST rcon xmm1 xmm2 + XMM_NUM aeskeygen_opd1 \xmm1 + XMM_NUM aeskeygen_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aeskeygen_opd1 aeskeygen_opd2 + .byte 0x0f, 0x3a, 0xdf + MODRM 0xc0 aeskeygen_opd1 aeskeygen_opd2 + .byte \rcon + .endm + + .macro AESIMC xmm1 xmm2 + XMM_NUM aesimc_opd1 \xmm1 + XMM_NUM aesimc_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesimc_opd1 aesimc_opd2 + .byte 0x0f, 0x38, 0xdb + MODRM 0xc0 aesimc_opd1 aesimc_opd2 + .endm + + .macro AESENC xmm1 xmm2 + XMM_NUM aesenc_opd1 \xmm1 + XMM_NUM aesenc_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesenc_opd1 aesenc_opd2 + .byte 0x0f, 0x38, 0xdc + MODRM 0xc0 aesenc_opd1 aesenc_opd2 + .endm + + .macro AESENCLAST xmm1 xmm2 + XMM_NUM aesenclast_opd1 \xmm1 + XMM_NUM aesenclast_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesenclast_opd1 aesenclast_opd2 + .byte 0x0f, 0x38, 0xdd + MODRM 0xc0 aesenclast_opd1 aesenclast_opd2 + .endm + + .macro AESDEC xmm1 xmm2 + XMM_NUM aesdec_opd1 \xmm1 + XMM_NUM aesdec_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesdec_opd1 aesdec_opd2 + .byte 0x0f, 0x38, 0xde + MODRM 0xc0 aesdec_opd1 aesdec_opd2 + .endm + + .macro AESDECLAST xmm1 xmm2 + XMM_NUM aesdeclast_opd1 \xmm1 + XMM_NUM aesdeclast_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesdeclast_opd1 aesdeclast_opd2 + .byte 0x0f, 0x38, 0xdf + MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2 + .endm + + .macro MOVQ_R64_XMM opd1 opd2 + REG_TYPE movq_r64_xmm_opd1_type \opd1 + .if movq_r64_xmm_opd1_type == REG_TYPE_XMM + XMM_NUM movq_r64_xmm_opd1 \opd1 + R64_NUM movq_r64_xmm_opd2 \opd2 + .else + R64_NUM movq_r64_xmm_opd1 \opd1 + XMM_NUM movq_r64_xmm_opd2 \opd2 + .endif + PFX_OPD_SIZE + PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1 + .if movq_r64_xmm_opd1_type == REG_TYPE_XMM + .byte 0x0f, 0x7e + .else + .byte 0x0f, 0x6e + .endif + MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2 + .endm +#endif + +#endif diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h new file mode 100644 index 00000000000..925b605eb5c --- /dev/null +++ b/arch/x86/include/asm/intel_scu_ipc.h @@ -0,0 +1,73 @@ +#ifndef _ASM_X86_INTEL_SCU_IPC_H_ +#define _ASM_X86_INTEL_SCU_IPC_H_ + +#include <linux/notifier.h> + +#define IPCMSG_WARM_RESET 0xF0 +#define IPCMSG_COLD_RESET 0xF1 +#define IPCMSG_SOFT_RESET 0xF2 +#define IPCMSG_COLD_BOOT 0xF3 + +#define IPCMSG_VRTC 0xFA /* Set vRTC device */ + /* Command id associated with message IPCMSG_VRTC */ + #define IPC_CMD_VRTC_SETTIME 1 /* Set time */ + #define IPC_CMD_VRTC_SETALARM 2 /* Set alarm */ + +/* Read single register */ +int intel_scu_ipc_ioread8(u16 addr, u8 *data); + +/* Read two sequential registers */ +int intel_scu_ipc_ioread16(u16 addr, u16 *data); + +/* Read four sequential registers */ +int intel_scu_ipc_ioread32(u16 addr, u32 *data); + +/* Read a vector */ +int intel_scu_ipc_readv(u16 *addr, u8 *data, int len); + +/* Write single register */ +int intel_scu_ipc_iowrite8(u16 addr, u8 data); + +/* Write two sequential registers */ +int intel_scu_ipc_iowrite16(u16 addr, u16 data); + +/* Write four sequential registers */ +int intel_scu_ipc_iowrite32(u16 addr, u32 data); + +/* Write a vector */ +int intel_scu_ipc_writev(u16 *addr, u8 *data, int len); + +/* Update single register based on the mask */ +int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask); + +/* Issue commands to the SCU with or without data */ +int intel_scu_ipc_simple_command(int cmd, int sub); +int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen, + u32 *out, int outlen); +/* I2C control api */ +int intel_scu_ipc_i2c_cntrl(u32 addr, u32 *data); + +/* Update FW version */ +int intel_scu_ipc_fw_update(u8 *buffer, u32 length); + +extern struct blocking_notifier_head intel_scu_notifier; + +static inline void intel_scu_notifier_add(struct notifier_block *nb) +{ + blocking_notifier_chain_register(&intel_scu_notifier, nb); +} + +static inline void intel_scu_notifier_remove(struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&intel_scu_notifier, nb); +} + +static inline int intel_scu_notifier_post(unsigned long v, void *p) +{ + return blocking_notifier_call_chain(&intel_scu_notifier, v, p); +} + +#define SCU_AVAILABLE 1 +#define SCU_DOWN 2 + +#endif diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h new file mode 100644 index 00000000000..d8e8eefbe24 --- /dev/null +++ b/arch/x86/include/asm/io.h @@ -0,0 +1,348 @@ +#ifndef _ASM_X86_IO_H +#define _ASM_X86_IO_H + +/* + * This file contains the definitions for the x86 IO instructions + * inb/inw/inl/outb/outw/outl and the "string versions" of the same + * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" + * versions of the single-IO instructions (inb_p/inw_p/..). + * + * This file is not meant to be obfuscating: it's just complicated + * to (a) handle it all in a way that makes gcc able to optimize it + * as well as possible and (b) trying to avoid writing the same thing + * over and over again with slight variations and possibly making a + * mistake somewhere. + */ + +/* + * Thanks to James van Artsdalen for a better timing-fix than + * the two short jumps: using outb's to a nonexistent port seems + * to guarantee better timings even on fast machines. + * + * On the other hand, I'd like to be sure of a non-existent port: + * I feel a bit unsafe about using 0x80 (should be safe, though) + * + * Linus + */ + + /* + * Bit simplified and optimized by Jan Hubicka + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. + * + * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, + * isa_read[wl] and isa_write[wl] fixed + * - Arnaldo Carvalho de Melo <acme@conectiva.com.br> + */ + +#define ARCH_HAS_IOREMAP_WC + +#include <linux/string.h> +#include <linux/compiler.h> +#include <asm/page.h> + +#define build_mmio_read(name, size, type, reg, barrier) \ +static inline type name(const volatile void __iomem *addr) \ +{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \ +:"m" (*(volatile type __force *)addr) barrier); return ret; } + +#define build_mmio_write(name, size, type, reg, barrier) \ +static inline void name(type val, volatile void __iomem *addr) \ +{ asm volatile("mov" size " %0,%1": :reg (val), \ +"m" (*(volatile type __force *)addr) barrier); } + +build_mmio_read(readb, "b", unsigned char, "=q", :"memory") +build_mmio_read(readw, "w", unsigned short, "=r", :"memory") +build_mmio_read(readl, "l", unsigned int, "=r", :"memory") + +build_mmio_read(__readb, "b", unsigned char, "=q", ) +build_mmio_read(__readw, "w", unsigned short, "=r", ) +build_mmio_read(__readl, "l", unsigned int, "=r", ) + +build_mmio_write(writeb, "b", unsigned char, "q", :"memory") +build_mmio_write(writew, "w", unsigned short, "r", :"memory") +build_mmio_write(writel, "l", unsigned int, "r", :"memory") + +build_mmio_write(__writeb, "b", unsigned char, "q", ) +build_mmio_write(__writew, "w", unsigned short, "r", ) +build_mmio_write(__writel, "l", unsigned int, "r", ) + +#define readb_relaxed(a) __readb(a) +#define readw_relaxed(a) __readw(a) +#define readl_relaxed(a) __readl(a) +#define __raw_readb __readb +#define __raw_readw __readw +#define __raw_readl __readl + +#define __raw_writeb __writeb +#define __raw_writew __writew +#define __raw_writel __writel + +#define mmiowb() barrier() + +#ifdef CONFIG_X86_64 + +build_mmio_read(readq, "q", unsigned long, "=r", :"memory") +build_mmio_write(writeq, "q", unsigned long, "r", :"memory") + +#define readq_relaxed(a) readq(a) + +#define __raw_readq(a) readq(a) +#define __raw_writeq(val, addr) writeq(val, addr) + +/* Let people know that we have them */ +#define readq readq +#define writeq writeq + +#endif + +/** + * virt_to_phys - map virtual addresses to physical + * @address: address to remap + * + * The returned physical address is the physical (CPU) mapping for + * the memory address given. It is only valid to use this function on + * addresses directly mapped or allocated via kmalloc. + * + * This function does not give bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline phys_addr_t virt_to_phys(volatile void *address) +{ + return __pa(address); +} + +/** + * phys_to_virt - map physical address to virtual + * @address: address to remap + * + * The returned virtual address is a current CPU mapping for + * the memory address given. It is only valid to use this function on + * addresses that have a kernel mapping + * + * This function does not handle bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline void *phys_to_virt(phys_addr_t address) +{ + return __va(address); +} + +/* + * Change "struct page" to physical address. + */ +#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) + +/* + * ISA I/O bus memory addresses are 1:1 with the physical address. + * However, we truncate the address to unsigned int to avoid undesirable + * promitions in legacy drivers. + */ +static inline unsigned int isa_virt_to_bus(volatile void *address) +{ + return (unsigned int)virt_to_phys(address); +} +#define isa_page_to_bus(page) ((unsigned int)page_to_phys(page)) +#define isa_bus_to_virt phys_to_virt + +/* + * However PCI ones are not necessarily 1:1 and therefore these interfaces + * are forbidden in portable PCI drivers. + * + * Allow them on x86 for legacy drivers, though. + */ +#define virt_to_bus virt_to_phys +#define bus_to_virt phys_to_virt + +/** + * ioremap - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * If the area you are trying to map is a PCI BAR you should have a + * look at pci_iomap(). + */ +extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, + unsigned long prot_val); + +/* + * The default ioremap() behavior is non-cached: + */ +static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) +{ + return ioremap_nocache(offset, size); +} + +extern void iounmap(volatile void __iomem *addr); + +extern void set_iounmap_nonlazy(void); + +#ifdef __KERNEL__ + +#include <asm-generic/iomap.h> + +#include <linux/vmalloc.h> + +/* + * Convert a virtual cached pointer to an uncached pointer + */ +#define xlate_dev_kmem_ptr(p) p + +static inline void +memset_io(volatile void __iomem *addr, unsigned char val, size_t count) +{ + memset((void __force *)addr, val, count); +} + +static inline void +memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count) +{ + memcpy(dst, (const void __force *)src, count); +} + +static inline void +memcpy_toio(volatile void __iomem *dst, const void *src, size_t count) +{ + memcpy((void __force *)dst, src, count); +} + +/* + * ISA space is 'always mapped' on a typical x86 system, no need to + * explicitly ioremap() it. The fact that the ISA IO space is mapped + * to PAGE_OFFSET is pure coincidence - it does not mean ISA values + * are physical addresses. The following constant pointer can be + * used as the IO-area pointer (it can be iounmapped as well, so the + * analogy with PCI is quite large): + */ +#define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) + +/* + * Cache management + * + * This needed for two cases + * 1. Out of order aware processors + * 2. Accidentally out of order processors (PPro errata #51) + */ + +static inline void flush_write_buffers(void) +{ +#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) + asm volatile("lock; addl $0,0(%%esp)": : :"memory"); +#endif +} + +#endif /* __KERNEL__ */ + +extern void native_io_delay(void); + +extern int io_delay_type; +extern void io_delay_init(void); + +#if defined(CONFIG_PARAVIRT) +#include <asm/paravirt.h> +#else + +static inline void slow_down_io(void) +{ + native_io_delay(); +#ifdef REALLY_SLOW_IO + native_io_delay(); + native_io_delay(); + native_io_delay(); +#endif +} + +#endif + +#define BUILDIO(bwl, bw, type) \ +static inline void out##bwl(unsigned type value, int port) \ +{ \ + asm volatile("out" #bwl " %" #bw "0, %w1" \ + : : "a"(value), "Nd"(port)); \ +} \ + \ +static inline unsigned type in##bwl(int port) \ +{ \ + unsigned type value; \ + asm volatile("in" #bwl " %w1, %" #bw "0" \ + : "=a"(value) : "Nd"(port)); \ + return value; \ +} \ + \ +static inline void out##bwl##_p(unsigned type value, int port) \ +{ \ + out##bwl(value, port); \ + slow_down_io(); \ +} \ + \ +static inline unsigned type in##bwl##_p(int port) \ +{ \ + unsigned type value = in##bwl(port); \ + slow_down_io(); \ + return value; \ +} \ + \ +static inline void outs##bwl(int port, const void *addr, unsigned long count) \ +{ \ + asm volatile("rep; outs" #bwl \ + : "+S"(addr), "+c"(count) : "d"(port)); \ +} \ + \ +static inline void ins##bwl(int port, void *addr, unsigned long count) \ +{ \ + asm volatile("rep; ins" #bwl \ + : "+D"(addr), "+c"(count) : "d"(port)); \ +} + +BUILDIO(b, b, char) +BUILDIO(w, w, short) +BUILDIO(l, , int) + +extern void *xlate_dev_mem_ptr(unsigned long phys); +extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr); + +extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, + unsigned long prot_val); +extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); + +/* + * early_ioremap() and early_iounmap() are for temporary early boot-time + * mappings, before the real ioremap() is functional. + * A boot-time mapping is currently limited to at most 16 pages. + */ +extern void early_ioremap_init(void); +extern void early_ioremap_reset(void); +extern void __iomem *early_ioremap(resource_size_t phys_addr, + unsigned long size); +extern void __iomem *early_memremap(resource_size_t phys_addr, + unsigned long size); +extern void early_iounmap(void __iomem *addr, unsigned long size); +extern void fixup_early_ioremap(void); +extern bool is_early_ioremap_ptep(pte_t *ptep); + +#ifdef CONFIG_XEN +#include <xen/xen.h> +struct bio_vec; + +extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, + const struct bio_vec *vec2); + +#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ + (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ + (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) +#endif /* CONFIG_XEN */ + +#define IO_SPACE_LIMIT 0xffff + +#endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h new file mode 100644 index 00000000000..ba1b11ac1cf --- /dev/null +++ b/arch/x86/include/asm/io_apic.h @@ -0,0 +1,235 @@ +#ifndef _ASM_X86_IO_APIC_H +#define _ASM_X86_IO_APIC_H + +#include <linux/types.h> +#include <asm/mpspec.h> +#include <asm/apicdef.h> +#include <asm/irq_vectors.h> +#include <asm/x86_init.h> + +/* + * Intel IO-APIC support for SMP and UP systems. + * + * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar + */ + +/* I/O Unit Redirection Table */ +#define IO_APIC_REDIR_VECTOR_MASK 0x000FF +#define IO_APIC_REDIR_DEST_LOGICAL 0x00800 +#define IO_APIC_REDIR_DEST_PHYSICAL 0x00000 +#define IO_APIC_REDIR_SEND_PENDING (1 << 12) +#define IO_APIC_REDIR_REMOTE_IRR (1 << 14) +#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15) +#define IO_APIC_REDIR_MASKED (1 << 16) + +/* + * The structure of the IO-APIC: + */ +union IO_APIC_reg_00 { + u32 raw; + struct { + u32 __reserved_2 : 14, + LTS : 1, + delivery_type : 1, + __reserved_1 : 8, + ID : 8; + } __attribute__ ((packed)) bits; +}; + +union IO_APIC_reg_01 { + u32 raw; + struct { + u32 version : 8, + __reserved_2 : 7, + PRQ : 1, + entries : 8, + __reserved_1 : 8; + } __attribute__ ((packed)) bits; +}; + +union IO_APIC_reg_02 { + u32 raw; + struct { + u32 __reserved_2 : 24, + arbitration : 4, + __reserved_1 : 4; + } __attribute__ ((packed)) bits; +}; + +union IO_APIC_reg_03 { + u32 raw; + struct { + u32 boot_DT : 1, + __reserved_1 : 31; + } __attribute__ ((packed)) bits; +}; + +struct IO_APIC_route_entry { + __u32 vector : 8, + delivery_mode : 3, /* 000: FIXED + * 001: lowest prio + * 111: ExtINT + */ + dest_mode : 1, /* 0: physical, 1: logical */ + delivery_status : 1, + polarity : 1, + irr : 1, + trigger : 1, /* 0: edge, 1: level */ + mask : 1, /* 0: enabled, 1: disabled */ + __reserved_2 : 15; + + __u32 __reserved_3 : 24, + dest : 8; +} __attribute__ ((packed)); + +struct IR_IO_APIC_route_entry { + __u64 vector : 8, + zero : 3, + index2 : 1, + delivery_status : 1, + polarity : 1, + irr : 1, + trigger : 1, + mask : 1, + reserved : 31, + format : 1, + index : 15; +} __attribute__ ((packed)); + +#define IOAPIC_AUTO -1 +#define IOAPIC_EDGE 0 +#define IOAPIC_LEVEL 1 + +#ifdef CONFIG_X86_IO_APIC + +/* + * # of IO-APICs and # of IRQ routing registers + */ +extern int nr_ioapics; + +extern int mpc_ioapic_id(int ioapic); +extern unsigned int mpc_ioapic_addr(int ioapic); +extern struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic); + +#define MP_MAX_IOAPIC_PIN 127 + +/* # of MP IRQ source entries */ +extern int mp_irq_entries; + +/* MP IRQ source entries */ +extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; + +/* non-0 if default (table-less) MP configuration */ +extern int mpc_default_type; + +/* Older SiS APIC requires we rewrite the index register */ +extern int sis_apic_bug; + +/* 1 if "noapic" boot option passed */ +extern int skip_ioapic_setup; + +/* 1 if "noapic" boot option passed */ +extern int noioapicquirk; + +/* -1 if "noapic" boot option passed */ +extern int noioapicreroute; + +/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ +extern int timer_through_8259; + +/* + * If we use the IO-APIC for IRQ routing, disable automatic + * assignment of PCI IRQ's. + */ +#define io_apic_assign_pci_irqs \ + (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) + +struct io_apic_irq_attr; +extern int io_apic_set_pci_routing(struct device *dev, int irq, + struct io_apic_irq_attr *irq_attr); +void setup_IO_APIC_irq_extra(u32 gsi); +extern void ioapic_insert_resources(void); + +int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); + +extern int save_ioapic_entries(void); +extern void mask_ioapic_entries(void); +extern int restore_ioapic_entries(void); + +extern int get_nr_irqs_gsi(void); + +extern void setup_ioapic_ids_from_mpc(void); +extern void setup_ioapic_ids_from_mpc_nocheck(void); + +struct mp_ioapic_gsi{ + u32 gsi_base; + u32 gsi_end; +}; +extern struct mp_ioapic_gsi mp_gsi_routing[]; +extern u32 gsi_top; +int mp_find_ioapic(u32 gsi); +int mp_find_ioapic_pin(int ioapic, u32 gsi); +void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); +extern void __init pre_init_apic_IRQ0(void); + +extern void mp_save_irq(struct mpc_intsrc *m); + +extern void disable_ioapic_support(void); + + +void __init native_ioapic_init_mappings(void); +unsigned int native_ioapic_read(unsigned int apic, unsigned int reg); +void native_ioapic_write(unsigned int apic, unsigned int reg, + unsigned int val); +void native_ioapic_modify(unsigned int apic, unsigned int reg, + unsigned int val); + +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) +{ + return x86_ioapic.read(apic, reg); +} + +static inline void io_apic_write(unsigned int apic, unsigned int reg, + unsigned int value) +{ + x86_ioapic.write(apic, reg, value); +} + +static inline void io_apic_modify(unsigned int apic, unsigned int reg, + unsigned int value) +{ + x86_ioapic.modify(apic, reg, value); +} +#else /* !CONFIG_X86_IO_APIC */ + +#define io_apic_assign_pci_irqs 0 +#define setup_ioapic_ids_from_mpc x86_init_noop +static const int timer_through_8259 = 0; +static inline void ioapic_insert_resources(void) { } +#define gsi_top (NR_IRQS_LEGACY) +static inline int mp_find_ioapic(u32 gsi) { return 0; } + +struct io_apic_irq_attr; +static inline int io_apic_set_pci_routing(struct device *dev, int irq, + struct io_apic_irq_attr *irq_attr) { return 0; } + +static inline int save_ioapic_entries(void) +{ + return -ENOMEM; +} + +static inline void mask_ioapic_entries(void) { } +static inline int restore_ioapic_entries(void) +{ + return -ENOMEM; +} + +static inline void mp_save_irq(struct mpc_intsrc *m) { }; +static inline void disable_ioapic_support(void) { } +#define native_ioapic_init_mappings NULL +#define native_ioapic_read NULL +#define native_ioapic_write NULL +#define native_ioapic_modify NULL +#endif + +#endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/ioctl.h b/arch/x86/include/asm/ioctl.h new file mode 100644 index 00000000000..b279fe06dfe --- /dev/null +++ b/arch/x86/include/asm/ioctl.h @@ -0,0 +1 @@ +#include <asm-generic/ioctl.h> diff --git a/arch/x86/include/asm/ioctls.h b/arch/x86/include/asm/ioctls.h new file mode 100644 index 00000000000..ec34c760665 --- /dev/null +++ b/arch/x86/include/asm/ioctls.h @@ -0,0 +1 @@ +#include <asm-generic/ioctls.h> diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h new file mode 100644 index 00000000000..363e33eb6ec --- /dev/null +++ b/arch/x86/include/asm/iomap.h @@ -0,0 +1,41 @@ +#ifndef _ASM_X86_IOMAP_H +#define _ASM_X86_IOMAP_H + +/* + * Copyright © 2008 Ingo Molnar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/uaccess.h> +#include <asm/cacheflush.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> + +void __iomem * +iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); + +void +iounmap_atomic(void __iomem *kvaddr); + +int +iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot); + +void +iomap_free(resource_size_t base, unsigned long size); + +#endif /* _ASM_X86_IOMAP_H */ diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h new file mode 100644 index 00000000000..dffc38ee625 --- /dev/null +++ b/arch/x86/include/asm/iommu.h @@ -0,0 +1,13 @@ +#ifndef _ASM_X86_IOMMU_H +#define _ASM_X86_IOMMU_H + +extern struct dma_map_ops nommu_dma_ops; +extern int force_iommu, no_iommu; +extern int iommu_detected; +extern int iommu_pass_through; +extern int iommu_group_mf; + +/* 10 seconds */ +#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) + +#endif /* _ASM_X86_IOMMU_H */ diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h new file mode 100644 index 00000000000..f229b13a5f3 --- /dev/null +++ b/arch/x86/include/asm/iommu_table.h @@ -0,0 +1,100 @@ +#ifndef _ASM_X86_IOMMU_TABLE_H +#define _ASM_X86_IOMMU_TABLE_H + +#include <asm/swiotlb.h> + +/* + * History lesson: + * The execution chain of IOMMUs in 2.6.36 looks as so: + * + * [xen-swiotlb] + * | + * +----[swiotlb *]--+ + * / | \ + * / | \ + * [GART] [Calgary] [Intel VT-d] + * / + * / + * [AMD-Vi] + * + * *: if SWIOTLB detected 'iommu=soft'/'swiotlb=force' it would skip + * over the rest of IOMMUs and unconditionally initialize the SWIOTLB. + * Also it would surreptitiously initialize set the swiotlb=1 if there were + * more than 4GB and if the user did not pass in 'iommu=off'. The swiotlb + * flag would be turned off by all IOMMUs except the Calgary one. + * + * The IOMMU_INIT* macros allow a similar tree (or more complex if desired) + * to be built by defining who we depend on. + * + * And all that needs to be done is to use one of the macros in the IOMMU + * and the pci-dma.c will take care of the rest. + */ + +struct iommu_table_entry { + initcall_t detect; + initcall_t depend; + void (*early_init)(void); /* No memory allocate available. */ + void (*late_init)(void); /* Yes, can allocate memory. */ +#define IOMMU_FINISH_IF_DETECTED (1<<0) +#define IOMMU_DETECTED (1<<1) + int flags; +}; +/* + * Macro fills out an entry in the .iommu_table that is equivalent + * to the fields that 'struct iommu_table_entry' has. The entries + * that are put in the .iommu_table section are not put in any order + * hence during boot-time we will have to resort them based on + * dependency. */ + + +#define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\ + static const struct iommu_table_entry const \ + __iommu_entry_##_detect __used \ + __attribute__ ((unused, __section__(".iommu_table"), \ + aligned((sizeof(void *))))) \ + = {_detect, _depend, _early_init, _late_init, \ + _finish ? IOMMU_FINISH_IF_DETECTED : 0} +/* + * The simplest IOMMU definition. Provide the detection routine + * and it will be run after the SWIOTLB and the other IOMMUs + * that utilize this macro. If the IOMMU is detected (ie, the + * detect routine returns a positive value), the other IOMMUs + * are also checked. You can use IOMMU_INIT_POST_FINISH if you prefer + * to stop detecting the other IOMMUs after yours has been detected. + */ +#define IOMMU_INIT_POST(_detect) \ + __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 0) + +#define IOMMU_INIT_POST_FINISH(detect) \ + __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 1) + +/* + * A more sophisticated version of IOMMU_INIT. This variant requires: + * a). A detection routine function. + * b). The name of the detection routine we depend on to get called + * before us. + * c). The init routine which gets called if the detection routine + * returns a positive value from the pci_iommu_alloc. This means + * no presence of a memory allocator. + * d). Similar to the 'init', except that this gets called from pci_iommu_init + * where we do have a memory allocator. + * + * The standard vs the _FINISH differs in that the _FINISH variant will + * continue detecting other IOMMUs in the call list after the + * the detection routine returns a positive number. The _FINISH will + * stop the execution chain. Both will still call the 'init' and + * 'late_init' functions if they are set. + */ +#define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init) \ + __IOMMU_INIT(_detect, _depend, _init, _late_init, 1) + +#define IOMMU_INIT(_detect, _depend, _init, _late_init) \ + __IOMMU_INIT(_detect, _depend, _init, _late_init, 0) + +void sort_iommu_table(struct iommu_table_entry *start, + struct iommu_table_entry *finish); + +void check_iommu_entries(struct iommu_table_entry *start, + struct iommu_table_entry *finish); + +#endif /* _ASM_X86_IOMMU_TABLE_H */ diff --git a/arch/x86/include/asm/ipcbuf.h b/arch/x86/include/asm/ipcbuf.h new file mode 100644 index 00000000000..84c7e51cb6d --- /dev/null +++ b/arch/x86/include/asm/ipcbuf.h @@ -0,0 +1 @@ +#include <asm-generic/ipcbuf.h> diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h new file mode 100644 index 00000000000..615fa9061b5 --- /dev/null +++ b/arch/x86/include/asm/ipi.h @@ -0,0 +1,162 @@ +#ifndef _ASM_X86_IPI_H +#define _ASM_X86_IPI_H + +#ifdef CONFIG_X86_LOCAL_APIC + +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Generic APIC InterProcessor Interrupt code. + * + * Moved to include file by James Cleverdon from + * arch/x86-64/kernel/smp.c + * + * Copyrights from kernel/smp.c: + * + * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> + * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> + * (c) 2002,2003 Andi Kleen, SuSE Labs. + * Subject to the GNU Public License, v.2 + */ + +#include <asm/hw_irq.h> +#include <asm/apic.h> +#include <asm/smp.h> + +/* + * the following functions deal with sending IPIs between CPUs. + * + * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. + */ + +static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector, + unsigned int dest) +{ + unsigned int icr = shortcut | dest; + + switch (vector) { + default: + icr |= APIC_DM_FIXED | vector; + break; + case NMI_VECTOR: + icr |= APIC_DM_NMI; + break; + } + return icr; +} + +static inline int __prepare_ICR2(unsigned int mask) +{ + return SET_APIC_DEST_FIELD(mask); +} + +static inline void __xapic_wait_icr_idle(void) +{ + while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY) + cpu_relax(); +} + +static inline void +__default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) +{ + /* + * Subtle. In the case of the 'never do double writes' workaround + * we have to lock out interrupts to be safe. As we don't care + * of the value read we use an atomic rmw access to avoid costly + * cli/sti. Otherwise we use an even cheaper single atomic write + * to the APIC. + */ + unsigned int cfg; + + /* + * Wait for idle. + */ + __xapic_wait_icr_idle(); + + /* + * No need to touch the target chip field + */ + cfg = __prepare_ICR(shortcut, vector, dest); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + native_apic_mem_write(APIC_ICR, cfg); +} + +/* + * This is used to send an IPI with no shorthand notation (the destination is + * specified in bits 56 to 63 of the ICR). + */ +static inline void + __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest) +{ + unsigned long cfg; + + /* + * Wait for idle. + */ + if (unlikely(vector == NMI_VECTOR)) + safe_apic_wait_icr_idle(); + else + __xapic_wait_icr_idle(); + + /* + * prepare target chip field + */ + cfg = __prepare_ICR2(mask); + native_apic_mem_write(APIC_ICR2, cfg); + + /* + * program the ICR + */ + cfg = __prepare_ICR(0, vector, dest); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + native_apic_mem_write(APIC_ICR, cfg); +} + +extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, + int vector); +extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, + int vector); + +/* Avoid include hell */ +#define NMI_VECTOR 0x02 + +extern int no_broadcast; + +static inline void __default_local_send_IPI_allbutself(int vector) +{ + if (no_broadcast || vector == NMI_VECTOR) + apic->send_IPI_mask_allbutself(cpu_online_mask, vector); + else + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical); +} + +static inline void __default_local_send_IPI_all(int vector) +{ + if (no_broadcast || vector == NMI_VECTOR) + apic->send_IPI_mask(cpu_online_mask, vector); + else + __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical); +} + +#ifdef CONFIG_X86_32 +extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, + int vector); +extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, + int vector); +extern void default_send_IPI_mask_logical(const struct cpumask *mask, + int vector); +extern void default_send_IPI_allbutself(int vector); +extern void default_send_IPI_all(int vector); +extern void default_send_IPI_self(int vector); +#endif + +#endif + +#endif /* _ASM_X86_IPI_H */ diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h new file mode 100644 index 00000000000..ba870bb6dd8 --- /dev/null +++ b/arch/x86/include/asm/irq.h @@ -0,0 +1,44 @@ +#ifndef _ASM_X86_IRQ_H +#define _ASM_X86_IRQ_H +/* + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + * + * IRQ/IPI changes taken from work by Thomas Radke + * <tomsoft@informatik.tu-chemnitz.de> + */ + +#include <asm/apicdef.h> +#include <asm/irq_vectors.h> + +static inline int irq_canonicalize(int irq) +{ + return ((irq == 2) ? 9 : irq); +} + +#ifdef CONFIG_X86_32 +extern void irq_ctx_init(int cpu); +#else +# define irq_ctx_init(cpu) do { } while (0) +#endif + +#define __ARCH_HAS_DO_SOFTIRQ + +#ifdef CONFIG_HOTPLUG_CPU +#include <linux/cpumask.h> +extern void fixup_irqs(void); +extern void irq_force_complete_move(int); +#endif + +extern void (*x86_platform_ipi_callback)(void); +extern void native_init_IRQ(void); +extern bool handle_irq(unsigned irq, struct pt_regs *regs); + +extern unsigned int do_IRQ(struct pt_regs *regs); + +/* Interrupt vector management */ +extern DECLARE_BITMAP(used_vectors, NR_VECTORS); +extern int vector_used_by_percpu_irq(unsigned int vector); + +extern void init_ISA_irqs(void); + +#endif /* _ASM_X86_IRQ_H */ diff --git a/arch/x86/include/asm/irq_controller.h b/arch/x86/include/asm/irq_controller.h new file mode 100644 index 00000000000..423bbbddf36 --- /dev/null +++ b/arch/x86/include/asm/irq_controller.h @@ -0,0 +1,12 @@ +#ifndef __IRQ_CONTROLLER__ +#define __IRQ_CONTROLLER__ + +struct irq_domain { + int (*xlate)(struct irq_domain *h, const u32 *intspec, u32 intsize, + u32 *out_hwirq, u32 *out_type); + void *priv; + struct device_node *controller; + struct list_head l; +}; + +#endif diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h new file mode 100644 index 00000000000..77843225b7e --- /dev/null +++ b/arch/x86/include/asm/irq_regs.h @@ -0,0 +1,31 @@ +/* + * Per-cpu current frame pointer - the location of the last exception frame on + * the stack, stored in the per-cpu area. + * + * Jeremy Fitzhardinge <jeremy@goop.org> + */ +#ifndef _ASM_X86_IRQ_REGS_H +#define _ASM_X86_IRQ_REGS_H + +#include <asm/percpu.h> + +#define ARCH_HAS_OWN_IRQ_REGS + +DECLARE_PER_CPU(struct pt_regs *, irq_regs); + +static inline struct pt_regs *get_irq_regs(void) +{ + return percpu_read(irq_regs); +} + +static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) +{ + struct pt_regs *old_regs; + + old_regs = get_irq_regs(); + percpu_write(irq_regs, new_regs); + + return old_regs; +} + +#endif /* _ASM_X86_IRQ_REGS_32_H */ diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h new file mode 100644 index 00000000000..47d99934580 --- /dev/null +++ b/arch/x86/include/asm/irq_remapping.h @@ -0,0 +1,45 @@ +#ifndef _ASM_X86_IRQ_REMAPPING_H +#define _ASM_X86_IRQ_REMAPPING_H + +#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8) + +#ifdef CONFIG_IRQ_REMAP +static void irq_remap_modify_chip_defaults(struct irq_chip *chip); +static inline void prepare_irte(struct irte *irte, int vector, + unsigned int dest) +{ + memset(irte, 0, sizeof(*irte)); + + irte->present = 1; + irte->dst_mode = apic->irq_dest_mode; + /* + * Trigger mode in the IRTE will always be edge, and for IO-APIC, the + * actual level or edge trigger will be setup in the IO-APIC + * RTE. This will help simplify level triggered irq migration. + * For more details, see the comments (in io_apic.c) explainig IO-APIC + * irq migration in the presence of interrupt-remapping. + */ + irte->trigger_mode = 0; + irte->dlvry_mode = apic->irq_delivery_mode; + irte->vector = vector; + irte->dest_id = IRTE_DEST(dest); + irte->redir_hint = 1; +} +static inline bool irq_remapped(struct irq_cfg *cfg) +{ + return cfg->irq_2_iommu.iommu != NULL; +} +#else +static void prepare_irte(struct irte *irte, int vector, unsigned int dest) +{ +} +static inline bool irq_remapped(struct irq_cfg *cfg) +{ + return false; +} +static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) +{ +} +#endif + +#endif /* _ASM_X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h new file mode 100644 index 00000000000..4b4448761e8 --- /dev/null +++ b/arch/x86/include/asm/irq_vectors.h @@ -0,0 +1,172 @@ +#ifndef _ASM_X86_IRQ_VECTORS_H +#define _ASM_X86_IRQ_VECTORS_H + +#include <linux/threads.h> +/* + * Linux IRQ vector layout. + * + * There are 256 IDT entries (per CPU - each entry is 8 bytes) which can + * be defined by Linux. They are used as a jump table by the CPU when a + * given vector is triggered - by a CPU-external, CPU-internal or + * software-triggered event. + * + * Linux sets the kernel code address each entry jumps to early during + * bootup, and never changes them. This is the general layout of the + * IDT entries: + * + * Vectors 0 ... 31 : system traps and exceptions - hardcoded events + * Vectors 32 ... 127 : device interrupts + * Vector 128 : legacy int80 syscall interface + * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts + * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts + * + * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. + * + * This file enumerates the exact layout of them: + */ + +#define NMI_VECTOR 0x02 +#define MCE_VECTOR 0x12 + +/* + * IDT vectors usable for external interrupt sources start at 0x20. + * (0x80 is the syscall vector, 0x30-0x3f are for ISA) + */ +#define FIRST_EXTERNAL_VECTOR 0x20 +/* + * We start allocating at 0x21 to spread out vectors evenly between + * priority levels. (0x80 is the syscall vector) + */ +#define VECTOR_OFFSET_START 1 + +/* + * Reserve the lowest usable vector (and hence lowest priority) 0x20 for + * triggering cleanup after irq migration. 0x21-0x2f will still be used + * for device interrupts. + */ +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR + +#define IA32_SYSCALL_VECTOR 0x80 +#ifdef CONFIG_X86_32 +# define SYSCALL_VECTOR 0x80 +#endif + +/* + * Vectors 0x30-0x3f are used for ISA interrupts. + * round up to the next 16-vector boundary + */ +#define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15) + +#define IRQ1_VECTOR (IRQ0_VECTOR + 1) +#define IRQ2_VECTOR (IRQ0_VECTOR + 2) +#define IRQ3_VECTOR (IRQ0_VECTOR + 3) +#define IRQ4_VECTOR (IRQ0_VECTOR + 4) +#define IRQ5_VECTOR (IRQ0_VECTOR + 5) +#define IRQ6_VECTOR (IRQ0_VECTOR + 6) +#define IRQ7_VECTOR (IRQ0_VECTOR + 7) +#define IRQ8_VECTOR (IRQ0_VECTOR + 8) +#define IRQ9_VECTOR (IRQ0_VECTOR + 9) +#define IRQ10_VECTOR (IRQ0_VECTOR + 10) +#define IRQ11_VECTOR (IRQ0_VECTOR + 11) +#define IRQ12_VECTOR (IRQ0_VECTOR + 12) +#define IRQ13_VECTOR (IRQ0_VECTOR + 13) +#define IRQ14_VECTOR (IRQ0_VECTOR + 14) +#define IRQ15_VECTOR (IRQ0_VECTOR + 15) + +/* + * Special IRQ vectors used by the SMP architecture, 0xf0-0xff + * + * some of the following vectors are 'rare', they are merged + * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. + * TLB, reschedule and local APIC vectors are performance-critical. + */ + +#define SPURIOUS_APIC_VECTOR 0xff +/* + * Sanity check + */ +#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) +# error SPURIOUS_APIC_VECTOR definition error +#endif + +#define ERROR_APIC_VECTOR 0xfe +#define RESCHEDULE_VECTOR 0xfd +#define CALL_FUNCTION_VECTOR 0xfc +#define CALL_FUNCTION_SINGLE_VECTOR 0xfb +#define THERMAL_APIC_VECTOR 0xfa +#define THRESHOLD_APIC_VECTOR 0xf9 +#define REBOOT_VECTOR 0xf8 + +/* + * Generic system vector for platform specific use + */ +#define X86_PLATFORM_IPI_VECTOR 0xf7 + +/* + * IRQ work vector: + */ +#define IRQ_WORK_VECTOR 0xf6 + +#define UV_BAU_MESSAGE 0xf5 + +/* Xen vector callback to receive events in a HVM domain */ +#define XEN_HVM_EVTCHN_CALLBACK 0xf3 + +/* + * Local APIC timer IRQ vector is on a different priority level, + * to work around the 'lost local interrupt if more than 2 IRQ + * sources per level' errata. + */ +#define LOCAL_TIMER_VECTOR 0xef + +/* up to 32 vectors used for spreading out TLB flushes: */ +#if NR_CPUS <= 32 +# define NUM_INVALIDATE_TLB_VECTORS (NR_CPUS) +#else +# define NUM_INVALIDATE_TLB_VECTORS (32) +#endif + +#define INVALIDATE_TLB_VECTOR_END (0xee) +#define INVALIDATE_TLB_VECTOR_START \ + (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1) + +#define NR_VECTORS 256 + +#define FPU_IRQ 13 + +#define FIRST_VM86_IRQ 3 +#define LAST_VM86_IRQ 15 + +#ifndef __ASSEMBLY__ +static inline int invalid_vm86_irq(int irq) +{ + return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ; +} +#endif + +/* + * Size the maximum number of interrupts. + * + * If the irq_desc[] array has a sparse layout, we can size things + * generously - it scales up linearly with the maximum number of CPUs, + * and the maximum number of IO-APICs, whichever is higher. + * + * In other cases we size more conservatively, to not create too large + * static arrays. + */ + +#define NR_IRQS_LEGACY 16 + +#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) + +#ifdef CONFIG_X86_IO_APIC +# define CPU_VECTOR_LIMIT (64 * NR_CPUS) +# define NR_IRQS \ + (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ + (NR_VECTORS + CPU_VECTOR_LIMIT) : \ + (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) +#else /* !CONFIG_X86_IO_APIC: */ +# define NR_IRQS NR_IRQS_LEGACY +#endif + +#endif /* _ASM_X86_IRQ_VECTORS_H */ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h new file mode 100644 index 00000000000..bba3cf88e62 --- /dev/null +++ b/arch/x86/include/asm/irqflags.h @@ -0,0 +1,209 @@ +#ifndef _X86_IRQFLAGS_H_ +#define _X86_IRQFLAGS_H_ + +#include <asm/processor-flags.h> + +#ifndef __ASSEMBLY__ +/* + * Interrupt control: + */ + +static inline unsigned long native_save_fl(void) +{ + unsigned long flags; + + /* + * "=rm" is safe here, because "pop" adjusts the stack before + * it evaluates its effective address -- this is part of the + * documented behavior of the "pop" instruction. + */ + asm volatile("# __raw_save_flags\n\t" + "pushf ; pop %0" + : "=rm" (flags) + : /* no input */ + : "memory"); + + return flags; +} + +static inline void native_restore_fl(unsigned long flags) +{ + asm volatile("push %0 ; popf" + : /* no output */ + :"g" (flags) + :"memory", "cc"); +} + +static inline void native_irq_disable(void) +{ + asm volatile("cli": : :"memory"); +} + +static inline void native_irq_enable(void) +{ + asm volatile("sti": : :"memory"); +} + +static inline void native_safe_halt(void) +{ + asm volatile("sti; hlt": : :"memory"); +} + +static inline void native_halt(void) +{ + asm volatile("hlt": : :"memory"); +} + +#endif + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#ifndef __ASSEMBLY__ +#include <linux/types.h> + +static inline notrace unsigned long arch_local_save_flags(void) +{ + return native_save_fl(); +} + +static inline notrace void arch_local_irq_restore(unsigned long flags) +{ + native_restore_fl(flags); +} + +static inline notrace void arch_local_irq_disable(void) +{ + native_irq_disable(); +} + +static inline notrace void arch_local_irq_enable(void) +{ + native_irq_enable(); +} + +/* + * Used in the idle loop; sti takes one instruction cycle + * to complete: + */ +static inline void arch_safe_halt(void) +{ + native_safe_halt(); +} + +/* + * Used when interrupts are already enabled or to + * shutdown the processor: + */ +static inline void halt(void) +{ + native_halt(); +} + +/* + * For spinlocks, etc: + */ +static inline notrace unsigned long arch_local_irq_save(void) +{ + unsigned long flags = arch_local_save_flags(); + arch_local_irq_disable(); + return flags; +} +#else + +#define ENABLE_INTERRUPTS(x) sti +#define DISABLE_INTERRUPTS(x) cli + +#ifdef CONFIG_X86_64 +#define SWAPGS swapgs +/* + * Currently paravirt can't handle swapgs nicely when we + * don't have a stack we can rely on (such as a user space + * stack). So we either find a way around these or just fault + * and emulate if a guest tries to call swapgs directly. + * + * Either way, this is a good way to document that we don't + * have a reliable stack. x86_64 only. + */ +#define SWAPGS_UNSAFE_STACK swapgs + +#define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */ + +#define INTERRUPT_RETURN iretq +#define USERGS_SYSRET64 \ + swapgs; \ + sysretq; +#define USERGS_SYSRET32 \ + swapgs; \ + sysretl +#define ENABLE_INTERRUPTS_SYSEXIT32 \ + swapgs; \ + sti; \ + sysexit + +#else +#define INTERRUPT_RETURN iret +#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit +#define GET_CR0_INTO_EAX movl %cr0, %eax +#endif + + +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT */ + +#ifndef __ASSEMBLY__ +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & X86_EFLAGS_IF); +} + +static inline int arch_irqs_disabled(void) +{ + unsigned long flags = arch_local_save_flags(); + + return arch_irqs_disabled_flags(flags); +} + +#else + +#ifdef CONFIG_X86_64 +#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk +#define ARCH_LOCKDEP_SYS_EXIT_IRQ \ + TRACE_IRQS_ON; \ + sti; \ + SAVE_REST; \ + LOCKDEP_SYS_EXIT; \ + RESTORE_REST; \ + cli; \ + TRACE_IRQS_OFF; + +#else +#define ARCH_LOCKDEP_SYS_EXIT \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call lockdep_sys_exit; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +#define ARCH_LOCKDEP_SYS_EXIT_IRQ +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS +# define TRACE_IRQS_ON call trace_hardirqs_on_thunk; +# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; +#else +# define TRACE_IRQS_ON +# define TRACE_IRQS_OFF +#endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT +# define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ +# else +# define LOCKDEP_SYS_EXIT +# define LOCKDEP_SYS_EXIT_IRQ +# endif + +#endif /* __ASSEMBLY__ */ +#endif diff --git a/arch/x86/include/asm/ist.h b/arch/x86/include/asm/ist.h new file mode 100644 index 00000000000..7e5dff1de0e --- /dev/null +++ b/arch/x86/include/asm/ist.h @@ -0,0 +1,34 @@ +#ifndef _ASM_X86_IST_H +#define _ASM_X86_IST_H + +/* + * Include file for the interface to IST BIOS + * Copyright 2002 Andy Grover <andrew.grover@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + + +#include <linux/types.h> + +struct ist_info { + __u32 signature; + __u32 command; + __u32 event; + __u32 perf_level; +}; + +#ifdef __KERNEL__ + +extern struct ist_info ist_info; + +#endif /* __KERNEL__ */ +#endif /* _ASM_X86_IST_H */ diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h new file mode 100644 index 00000000000..a32b18ce6ea --- /dev/null +++ b/arch/x86/include/asm/jump_label.h @@ -0,0 +1,42 @@ +#ifndef _ASM_X86_JUMP_LABEL_H +#define _ASM_X86_JUMP_LABEL_H + +#ifdef __KERNEL__ + +#include <linux/types.h> +#include <asm/nops.h> +#include <asm/asm.h> + +#define JUMP_LABEL_NOP_SIZE 5 + +#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" + +static __always_inline bool arch_static_branch(struct jump_label_key *key) +{ + asm goto("1:" + JUMP_LABEL_INITIAL_NOP + ".pushsection __jump_table, \"aw\" \n\t" + _ASM_ALIGN "\n\t" + _ASM_PTR "1b, %l[l_yes], %c0 \n\t" + ".popsection \n\t" + : : "i" (key) : : l_yes); + return false; +l_yes: + return true; +} + +#endif /* __KERNEL__ */ + +#ifdef CONFIG_X86_64 +typedef u64 jump_label_t; +#else +typedef u32 jump_label_t; +#endif + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h new file mode 100644 index 00000000000..d73f1571bde --- /dev/null +++ b/arch/x86/include/asm/kdebug.h @@ -0,0 +1,40 @@ +#ifndef _ASM_X86_KDEBUG_H +#define _ASM_X86_KDEBUG_H + +#include <linux/notifier.h> + +struct pt_regs; + +/* Grossly misnamed. */ +enum die_val { + DIE_OOPS = 1, + DIE_INT3, + DIE_DEBUG, + DIE_PANIC, + DIE_NMI, + DIE_DIE, + DIE_KERNELDEBUG, + DIE_TRAP, + DIE_GPF, + DIE_CALL, + DIE_PAGE_FAULT, + DIE_NMIUNKNOWN, +}; + +extern void printk_address(unsigned long address, int reliable); +extern void die(const char *, struct pt_regs *,long); +extern int __must_check __die(const char *, struct pt_regs *, long); +extern void show_registers(struct pt_regs *regs); +extern void show_trace(struct task_struct *t, struct pt_regs *regs, + unsigned long *sp, unsigned long bp); +extern void __show_regs(struct pt_regs *regs, int all); +extern unsigned long oops_begin(void); +extern void oops_end(unsigned long, struct pt_regs *, int signr); +#ifdef CONFIG_KEXEC +extern int in_crash_kexec; +#else +/* no crash dump is ever in progress if no crash kernel can be kexec'd */ +#define in_crash_kexec 0 +#endif + +#endif /* _ASM_X86_KDEBUG_H */ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h new file mode 100644 index 00000000000..317ff1703d0 --- /dev/null +++ b/arch/x86/include/asm/kexec.h @@ -0,0 +1,168 @@ +#ifndef _ASM_X86_KEXEC_H +#define _ASM_X86_KEXEC_H + +#ifdef CONFIG_X86_32 +# define PA_CONTROL_PAGE 0 +# define VA_CONTROL_PAGE 1 +# define PA_PGD 2 +# define PA_SWAP_PAGE 3 +# define PAGES_NR 4 +#else +# define PA_CONTROL_PAGE 0 +# define VA_CONTROL_PAGE 1 +# define PA_TABLE_PAGE 2 +# define PA_SWAP_PAGE 3 +# define PAGES_NR 4 +#endif + +# define KEXEC_CONTROL_CODE_MAX_SIZE 2048 + +#ifndef __ASSEMBLY__ + +#include <linux/string.h> + +#include <asm/page.h> +#include <asm/ptrace.h> + +/* + * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. + * I.e. Maximum page that is mapped directly into kernel memory, + * and kmap is not required. + * + * So far x86_64 is limited to 40 physical address bits. + */ +#ifdef CONFIG_X86_32 +/* Maximum physical address we can use pages from */ +# define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) +/* Maximum address we can reach in physical address mode */ +# define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) +/* Maximum address we can use for the control code buffer */ +# define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE + +# define KEXEC_CONTROL_PAGE_SIZE 4096 + +/* The native architecture */ +# define KEXEC_ARCH KEXEC_ARCH_386 + +/* We can also handle crash dumps from 64 bit kernel. */ +# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) +#else +/* Maximum physical address we can use pages from */ +# define KEXEC_SOURCE_MEMORY_LIMIT (0xFFFFFFFFFFUL) +/* Maximum address we can reach in physical address mode */ +# define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL) +/* Maximum address we can use for the control pages */ +# define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL) + +/* Allocate one page for the pdp and the second for the code */ +# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL) + +/* The native architecture */ +# define KEXEC_ARCH KEXEC_ARCH_X86_64 +#endif + +/* + * CPU does not save ss and sp on stack if execution is already + * running in kernel mode at the time of NMI occurrence. This code + * fixes it. + */ +static inline void crash_fixup_ss_esp(struct pt_regs *newregs, + struct pt_regs *oldregs) +{ +#ifdef CONFIG_X86_32 + newregs->sp = (unsigned long)&(oldregs->sp); + asm volatile("xorl %%eax, %%eax\n\t" + "movw %%ss, %%ax\n\t" + :"=a"(newregs->ss)); +#endif +} + +/* + * This function is responsible for capturing register states if coming + * via panic otherwise just fix up the ss and sp if coming via kernel + * mode exception. + */ +static inline void crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) +{ + if (oldregs) { + memcpy(newregs, oldregs, sizeof(*newregs)); + crash_fixup_ss_esp(newregs, oldregs); + } else { +#ifdef CONFIG_X86_32 + asm volatile("movl %%ebx,%0" : "=m"(newregs->bx)); + asm volatile("movl %%ecx,%0" : "=m"(newregs->cx)); + asm volatile("movl %%edx,%0" : "=m"(newregs->dx)); + asm volatile("movl %%esi,%0" : "=m"(newregs->si)); + asm volatile("movl %%edi,%0" : "=m"(newregs->di)); + asm volatile("movl %%ebp,%0" : "=m"(newregs->bp)); + asm volatile("movl %%eax,%0" : "=m"(newregs->ax)); + asm volatile("movl %%esp,%0" : "=m"(newregs->sp)); + asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss)); + asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs)); + asm volatile("movl %%ds, %%eax;" :"=a"(newregs->ds)); + asm volatile("movl %%es, %%eax;" :"=a"(newregs->es)); + asm volatile("pushfl; popl %0" :"=m"(newregs->flags)); +#else + asm volatile("movq %%rbx,%0" : "=m"(newregs->bx)); + asm volatile("movq %%rcx,%0" : "=m"(newregs->cx)); + asm volatile("movq %%rdx,%0" : "=m"(newregs->dx)); + asm volatile("movq %%rsi,%0" : "=m"(newregs->si)); + asm volatile("movq %%rdi,%0" : "=m"(newregs->di)); + asm volatile("movq %%rbp,%0" : "=m"(newregs->bp)); + asm volatile("movq %%rax,%0" : "=m"(newregs->ax)); + asm volatile("movq %%rsp,%0" : "=m"(newregs->sp)); + asm volatile("movq %%r8,%0" : "=m"(newregs->r8)); + asm volatile("movq %%r9,%0" : "=m"(newregs->r9)); + asm volatile("movq %%r10,%0" : "=m"(newregs->r10)); + asm volatile("movq %%r11,%0" : "=m"(newregs->r11)); + asm volatile("movq %%r12,%0" : "=m"(newregs->r12)); + asm volatile("movq %%r13,%0" : "=m"(newregs->r13)); + asm volatile("movq %%r14,%0" : "=m"(newregs->r14)); + asm volatile("movq %%r15,%0" : "=m"(newregs->r15)); + asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss)); + asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs)); + asm volatile("pushfq; popq %0" :"=m"(newregs->flags)); +#endif + newregs->ip = (unsigned long)current_text_addr(); + } +} + +#ifdef CONFIG_X86_32 +asmlinkage unsigned long +relocate_kernel(unsigned long indirection_page, + unsigned long control_page, + unsigned long start_address, + unsigned int has_pae, + unsigned int preserve_context); +#else +unsigned long +relocate_kernel(unsigned long indirection_page, + unsigned long page_list, + unsigned long start_address, + unsigned int preserve_context); +#endif + +#define ARCH_HAS_KIMAGE_ARCH + +#ifdef CONFIG_X86_32 +struct kimage_arch { + pgd_t *pgd; +#ifdef CONFIG_X86_PAE + pmd_t *pmd0; + pmd_t *pmd1; +#endif + pte_t *pte0; + pte_t *pte1; +}; +#else +struct kimage_arch { + pud_t *pud; + pmd_t *pmd; + pte_t *pte; +}; +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_KEXEC_H */ diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h new file mode 100644 index 00000000000..77e95f54570 --- /dev/null +++ b/arch/x86/include/asm/kgdb.h @@ -0,0 +1,85 @@ +#ifndef _ASM_X86_KGDB_H +#define _ASM_X86_KGDB_H + +/* + * Copyright (C) 2001-2004 Amit S. Kale + * Copyright (C) 2008 Wind River Systems, Inc. + */ + +/* + * BUFMAX defines the maximum number of characters in inbound/outbound + * buffers at least NUMREGBYTES*2 are needed for register packets + * Longer buffer is needed to list all threads + */ +#define BUFMAX 1024 + +/* + * Note that this register image is in a different order than + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. + * Just why GDB uses a different order is a historical mystery. + */ +#ifdef CONFIG_X86_32 +enum regnames { + GDB_AX, /* 0 */ + GDB_CX, /* 1 */ + GDB_DX, /* 2 */ + GDB_BX, /* 3 */ + GDB_SP, /* 4 */ + GDB_BP, /* 5 */ + GDB_SI, /* 6 */ + GDB_DI, /* 7 */ + GDB_PC, /* 8 also known as eip */ + GDB_PS, /* 9 also known as eflags */ + GDB_CS, /* 10 */ + GDB_SS, /* 11 */ + GDB_DS, /* 12 */ + GDB_ES, /* 13 */ + GDB_FS, /* 14 */ + GDB_GS, /* 15 */ +}; +#define GDB_ORIG_AX 41 +#define DBG_MAX_REG_NUM 16 +#define NUMREGBYTES ((GDB_GS+1)*4) +#else /* ! CONFIG_X86_32 */ +enum regnames { + GDB_AX, /* 0 */ + GDB_BX, /* 1 */ + GDB_CX, /* 2 */ + GDB_DX, /* 3 */ + GDB_SI, /* 4 */ + GDB_DI, /* 5 */ + GDB_BP, /* 6 */ + GDB_SP, /* 7 */ + GDB_R8, /* 8 */ + GDB_R9, /* 9 */ + GDB_R10, /* 10 */ + GDB_R11, /* 11 */ + GDB_R12, /* 12 */ + GDB_R13, /* 13 */ + GDB_R14, /* 14 */ + GDB_R15, /* 15 */ + GDB_PC, /* 16 */ + GDB_PS, /* 17 */ + GDB_CS, /* 18 */ + GDB_SS, /* 19 */ +}; +#define GDB_ORIG_AX 57 +#define DBG_MAX_REG_NUM 20 +/* 17 64 bit regs and 3 32 bit regs */ +#define NUMREGBYTES ((17 * 8) + (3 * 4)) +#endif /* ! CONFIG_X86_32 */ + +static inline void arch_kgdb_breakpoint(void) +{ + asm(" int $3"); +} +#define BREAK_INSTR_SIZE 1 +#define CACHE_FLUSH_IS_SAFE 1 +#define GDB_ADJUSTS_BREAK_OFFSET + +extern int kgdb_ll_trap(int cmd, const char *str, + struct pt_regs *regs, long err, int trap, int sig); + +#endif /* _ASM_X86_KGDB_H */ diff --git a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h new file mode 100644 index 00000000000..9e00a731a7f --- /dev/null +++ b/arch/x86/include/asm/kmap_types.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_KMAP_TYPES_H +#define _ASM_X86_KMAP_TYPES_H + +#if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM) +#define __WITH_KM_FENCE +#endif + +#include <asm-generic/kmap_types.h> + +#undef __WITH_KM_FENCE + +#endif /* _ASM_X86_KMAP_TYPES_H */ diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h new file mode 100644 index 00000000000..ed01518f297 --- /dev/null +++ b/arch/x86/include/asm/kmemcheck.h @@ -0,0 +1,42 @@ +#ifndef ASM_X86_KMEMCHECK_H +#define ASM_X86_KMEMCHECK_H + +#include <linux/types.h> +#include <asm/ptrace.h> + +#ifdef CONFIG_KMEMCHECK +bool kmemcheck_active(struct pt_regs *regs); + +void kmemcheck_show(struct pt_regs *regs); +void kmemcheck_hide(struct pt_regs *regs); + +bool kmemcheck_fault(struct pt_regs *regs, + unsigned long address, unsigned long error_code); +bool kmemcheck_trap(struct pt_regs *regs); +#else +static inline bool kmemcheck_active(struct pt_regs *regs) +{ + return false; +} + +static inline void kmemcheck_show(struct pt_regs *regs) +{ +} + +static inline void kmemcheck_hide(struct pt_regs *regs) +{ +} + +static inline bool kmemcheck_fault(struct pt_regs *regs, + unsigned long address, unsigned long error_code) +{ + return false; +} + +static inline bool kmemcheck_trap(struct pt_regs *regs) +{ + return false; +} +#endif /* CONFIG_KMEMCHECK */ + +#endif diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h new file mode 100644 index 00000000000..54788253915 --- /dev/null +++ b/arch/x86/include/asm/kprobes.h @@ -0,0 +1,117 @@ +#ifndef _ASM_X86_KPROBES_H +#define _ASM_X86_KPROBES_H +/* + * Kernel Probes (KProbes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * + * See arch/x86/kernel/kprobes.c for x86 kprobes history. + */ +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/percpu.h> +#include <asm/insn.h> + +#define __ARCH_WANT_KPROBES_INSN_SLOT + +struct pt_regs; +struct kprobe; + +typedef u8 kprobe_opcode_t; +#define BREAKPOINT_INSTRUCTION 0xcc +#define RELATIVEJUMP_OPCODE 0xe9 +#define RELATIVEJUMP_SIZE 5 +#define RELATIVECALL_OPCODE 0xe8 +#define RELATIVE_ADDR_SIZE 4 +#define MAX_STACK_SIZE 64 +#define MIN_STACK_SIZE(ADDR) \ + (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ + THREAD_SIZE - (unsigned long)(ADDR))) \ + ? (MAX_STACK_SIZE) \ + : (((unsigned long)current_thread_info()) + \ + THREAD_SIZE - (unsigned long)(ADDR))) + +#define flush_insn_slot(p) do { } while (0) + +/* optinsn template addresses */ +extern kprobe_opcode_t optprobe_template_entry; +extern kprobe_opcode_t optprobe_template_val; +extern kprobe_opcode_t optprobe_template_call; +extern kprobe_opcode_t optprobe_template_end; +#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE) +#define MAX_OPTINSN_SIZE \ + (((unsigned long)&optprobe_template_end - \ + (unsigned long)&optprobe_template_entry) + \ + MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE) + +extern const int kretprobe_blacklist_size; + +void arch_remove_kprobe(struct kprobe *p); +void kretprobe_trampoline(void); + +/* Architecture specific copy of original instruction*/ +struct arch_specific_insn { + /* copy of the original instruction */ + kprobe_opcode_t *insn; + /* + * boostable = -1: This instruction type is not boostable. + * boostable = 0: This instruction type is boostable. + * boostable = 1: This instruction has been boosted: we have + * added a relative jump after the instruction copy in insn, + * so no single-step and fixup are needed (unless there's + * a post_handler or break_handler). + */ + int boostable; +}; + +struct arch_optimized_insn { + /* copy of the original instructions */ + kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE]; + /* detour code buffer */ + kprobe_opcode_t *insn; + /* the size of instructions copied to detour code buffer */ + size_t size; +}; + +/* Return true (!0) if optinsn is prepared for optimization. */ +static inline int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) +{ + return optinsn->size; +} + +struct prev_kprobe { + struct kprobe *kp; + unsigned long status; + unsigned long old_flags; + unsigned long saved_flags; +}; + +/* per-cpu kprobe control block */ +struct kprobe_ctlblk { + unsigned long kprobe_status; + unsigned long kprobe_old_flags; + unsigned long kprobe_saved_flags; + unsigned long *jprobe_saved_sp; + struct pt_regs jprobe_saved_regs; + kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; + struct prev_kprobe prev_kprobe; +}; + +extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); +extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); +#endif /* _ASM_X86_KPROBES_H */ diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h new file mode 100644 index 00000000000..4d8dcbdfc12 --- /dev/null +++ b/arch/x86/include/asm/kvm.h @@ -0,0 +1,324 @@ +#ifndef _ASM_X86_KVM_H +#define _ASM_X86_KVM_H + +/* + * KVM x86 specific structures and definitions + * + */ + +#include <linux/types.h> +#include <linux/ioctl.h> + +/* Select x86 specific features in <linux/kvm.h> */ +#define __KVM_HAVE_PIT +#define __KVM_HAVE_IOAPIC +#define __KVM_HAVE_DEVICE_ASSIGNMENT +#define __KVM_HAVE_MSI +#define __KVM_HAVE_USER_NMI +#define __KVM_HAVE_GUEST_DEBUG +#define __KVM_HAVE_MSIX +#define __KVM_HAVE_MCE +#define __KVM_HAVE_PIT_STATE2 +#define __KVM_HAVE_XEN_HVM +#define __KVM_HAVE_VCPU_EVENTS +#define __KVM_HAVE_DEBUGREGS +#define __KVM_HAVE_XSAVE +#define __KVM_HAVE_XCRS + +/* Architectural interrupt line count. */ +#define KVM_NR_INTERRUPTS 256 + +struct kvm_memory_alias { + __u32 slot; /* this has a different namespace than memory slots */ + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; + __u64 target_phys_addr; +}; + +/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */ +struct kvm_pic_state { + __u8 last_irr; /* edge detection */ + __u8 irr; /* interrupt request register */ + __u8 imr; /* interrupt mask register */ + __u8 isr; /* interrupt service register */ + __u8 priority_add; /* highest irq priority */ + __u8 irq_base; + __u8 read_reg_select; + __u8 poll; + __u8 special_mask; + __u8 init_state; + __u8 auto_eoi; + __u8 rotate_on_auto_eoi; + __u8 special_fully_nested_mode; + __u8 init4; /* true if 4 byte init */ + __u8 elcr; /* PIIX edge/trigger selection */ + __u8 elcr_mask; +}; + +#define KVM_IOAPIC_NUM_PINS 24 +struct kvm_ioapic_state { + __u64 base_address; + __u32 ioregsel; + __u32 id; + __u32 irr; + __u32 pad; + union { + __u64 bits; + struct { + __u8 vector; + __u8 delivery_mode:3; + __u8 dest_mode:1; + __u8 delivery_status:1; + __u8 polarity:1; + __u8 remote_irr:1; + __u8 trig_mode:1; + __u8 mask:1; + __u8 reserve:7; + __u8 reserved[4]; + __u8 dest_id; + } fields; + } redirtbl[KVM_IOAPIC_NUM_PINS]; +}; + +#define KVM_IRQCHIP_PIC_MASTER 0 +#define KVM_IRQCHIP_PIC_SLAVE 1 +#define KVM_IRQCHIP_IOAPIC 2 +#define KVM_NR_IRQCHIPS 3 + +/* for KVM_GET_REGS and KVM_SET_REGS */ +struct kvm_regs { + /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ + __u64 rax, rbx, rcx, rdx; + __u64 rsi, rdi, rsp, rbp; + __u64 r8, r9, r10, r11; + __u64 r12, r13, r14, r15; + __u64 rip, rflags; +}; + +/* for KVM_GET_LAPIC and KVM_SET_LAPIC */ +#define KVM_APIC_REG_SIZE 0x400 +struct kvm_lapic_state { + char regs[KVM_APIC_REG_SIZE]; +}; + +struct kvm_segment { + __u64 base; + __u32 limit; + __u16 selector; + __u8 type; + __u8 present, dpl, db, s, l, g, avl; + __u8 unusable; + __u8 padding; +}; + +struct kvm_dtable { + __u64 base; + __u16 limit; + __u16 padding[3]; +}; + + +/* for KVM_GET_SREGS and KVM_SET_SREGS */ +struct kvm_sregs { + /* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */ + struct kvm_segment cs, ds, es, fs, gs, ss; + struct kvm_segment tr, ldt; + struct kvm_dtable gdt, idt; + __u64 cr0, cr2, cr3, cr4, cr8; + __u64 efer; + __u64 apic_base; + __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; +}; + +/* for KVM_GET_FPU and KVM_SET_FPU */ +struct kvm_fpu { + __u8 fpr[8][16]; + __u16 fcw; + __u16 fsw; + __u8 ftwx; /* in fxsave format */ + __u8 pad1; + __u16 last_opcode; + __u64 last_ip; + __u64 last_dp; + __u8 xmm[16][16]; + __u32 mxcsr; + __u32 pad2; +}; + +struct kvm_msr_entry { + __u32 index; + __u32 reserved; + __u64 data; +}; + +/* for KVM_GET_MSRS and KVM_SET_MSRS */ +struct kvm_msrs { + __u32 nmsrs; /* number of msrs in entries */ + __u32 pad; + + struct kvm_msr_entry entries[0]; +}; + +/* for KVM_GET_MSR_INDEX_LIST */ +struct kvm_msr_list { + __u32 nmsrs; /* number of msrs in entries */ + __u32 indices[0]; +}; + + +struct kvm_cpuid_entry { + __u32 function; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding; +}; + +/* for KVM_SET_CPUID */ +struct kvm_cpuid { + __u32 nent; + __u32 padding; + struct kvm_cpuid_entry entries[0]; +}; + +struct kvm_cpuid_entry2 { + __u32 function; + __u32 index; + __u32 flags; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding[3]; +}; + +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 +#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 +#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 + +/* for KVM_SET_CPUID2 */ +struct kvm_cpuid2 { + __u32 nent; + __u32 padding; + struct kvm_cpuid_entry2 entries[0]; +}; + +/* for KVM_GET_PIT and KVM_SET_PIT */ +struct kvm_pit_channel_state { + __u32 count; /* can be 65536 */ + __u16 latched_count; + __u8 count_latched; + __u8 status_latched; + __u8 status; + __u8 read_state; + __u8 write_state; + __u8 write_latch; + __u8 rw_mode; + __u8 mode; + __u8 bcd; + __u8 gate; + __s64 count_load_time; +}; + +struct kvm_debug_exit_arch { + __u32 exception; + __u32 pad; + __u64 pc; + __u64 dr6; + __u64 dr7; +}; + +#define KVM_GUESTDBG_USE_SW_BP 0x00010000 +#define KVM_GUESTDBG_USE_HW_BP 0x00020000 +#define KVM_GUESTDBG_INJECT_DB 0x00040000 +#define KVM_GUESTDBG_INJECT_BP 0x00080000 + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { + __u64 debugreg[8]; +}; + +struct kvm_pit_state { + struct kvm_pit_channel_state channels[3]; +}; + +#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001 + +struct kvm_pit_state2 { + struct kvm_pit_channel_state channels[3]; + __u32 flags; + __u32 reserved[9]; +}; + +struct kvm_reinject_control { + __u8 pit_reinject; + __u8 reserved[31]; +}; + +/* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ +#define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 +#define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 +#define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 + +/* Interrupt shadow states */ +#define KVM_X86_SHADOW_INT_MOV_SS 0x01 +#define KVM_X86_SHADOW_INT_STI 0x02 + +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 injected; + __u8 nr; + __u8 has_error_code; + __u8 pad; + __u32 error_code; + } exception; + struct { + __u8 injected; + __u8 nr; + __u8 soft; + __u8 shadow; + } interrupt; + struct { + __u8 injected; + __u8 pending; + __u8 masked; + __u8 pad; + } nmi; + __u32 sipi_vector; + __u32 flags; + __u32 reserved[10]; +}; + +/* for KVM_GET/SET_DEBUGREGS */ +struct kvm_debugregs { + __u64 db[4]; + __u64 dr6; + __u64 dr7; + __u64 flags; + __u64 reserved[9]; +}; + +/* for KVM_CAP_XSAVE */ +struct kvm_xsave { + __u32 region[1024]; +}; + +#define KVM_MAX_XCRS 16 + +struct kvm_xcr { + __u32 xcr; + __u32 reserved; + __u64 value; +}; + +struct kvm_xcrs { + __u32 nr_xcrs; + __u32 flags; + struct kvm_xcr xcrs[KVM_MAX_XCRS]; + __u64 padding[16]; +}; + +#endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h new file mode 100644 index 00000000000..7b9cfc4878a --- /dev/null +++ b/arch/x86/include/asm/kvm_emulate.h @@ -0,0 +1,394 @@ +/****************************************************************************** + * x86_emulate.h + * + * Generic x86 (32-bit and 64-bit) instruction decoder and emulator. + * + * Copyright (c) 2005 Keir Fraser + * + * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4 + */ + +#ifndef _ASM_X86_KVM_X86_EMULATE_H +#define _ASM_X86_KVM_X86_EMULATE_H + +#include <asm/desc_defs.h> + +struct x86_emulate_ctxt; +enum x86_intercept; +enum x86_intercept_stage; + +struct x86_exception { + u8 vector; + bool error_code_valid; + u16 error_code; + bool nested_page_fault; + u64 address; /* cr2 or nested page fault gpa */ +}; + +/* + * This struct is used to carry enough information from the instruction + * decoder to main KVM so that a decision can be made whether the + * instruction needs to be intercepted or not. + */ +struct x86_instruction_info { + u8 intercept; /* which intercept */ + u8 rep_prefix; /* rep prefix? */ + u8 modrm_mod; /* mod part of modrm */ + u8 modrm_reg; /* index of register used */ + u8 modrm_rm; /* rm part of modrm */ + u64 src_val; /* value of source operand */ + u8 src_bytes; /* size of source operand */ + u8 dst_bytes; /* size of destination operand */ + u8 ad_bytes; /* size of src/dst address */ + u64 next_rip; /* rip following the instruction */ +}; + +/* + * x86_emulate_ops: + * + * These operations represent the instruction emulator's interface to memory. + * There are two categories of operation: those that act on ordinary memory + * regions (*_std), and those that act on memory regions known to require + * special treatment or emulation (*_emulated). + * + * The emulator assumes that an instruction accesses only one 'emulated memory' + * location, that this location is the given linear faulting address (cr2), and + * that this is one of the instruction's data operands. Instruction fetches and + * stack operations are assumed never to access emulated memory. The emulator + * automatically deduces which operand of a string-move operation is accessing + * emulated memory, and assumes that the other operand accesses normal memory. + * + * NOTES: + * 1. The emulator isn't very smart about emulated vs. standard memory. + * 'Emulated memory' access addresses should be checked for sanity. + * 'Normal memory' accesses may fault, and the caller must arrange to + * detect and handle reentrancy into the emulator via recursive faults. + * Accesses may be unaligned and may cross page boundaries. + * 2. If the access fails (cannot emulate, or a standard access faults) then + * it is up to the memop to propagate the fault to the guest VM via + * some out-of-band mechanism, unknown to the emulator. The memop signals + * failure by returning X86EMUL_PROPAGATE_FAULT to the emulator, which will + * then immediately bail. + * 3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only + * cmpxchg8b_emulated need support 8-byte accesses. + * 4. The emulator cannot handle 64-bit mode emulation on an x86/32 system. + */ +/* Access completed successfully: continue emulation as normal. */ +#define X86EMUL_CONTINUE 0 +/* Access is unhandleable: bail from emulation and return error to caller. */ +#define X86EMUL_UNHANDLEABLE 1 +/* Terminate emulation but return success to the caller. */ +#define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ +#define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ +#define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ +#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ +#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */ + +struct x86_emulate_ops { + /* + * read_std: Read bytes of standard (non-emulated/special) memory. + * Used for descriptor reading. + * @addr: [IN ] Linear address from which to read. + * @val: [OUT] Value read from memory, zero-extended to 'u_long'. + * @bytes: [IN ] Number of bytes to read from memory. + */ + int (*read_std)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, + unsigned int bytes, + struct x86_exception *fault); + + /* + * write_std: Write bytes of standard (non-emulated/special) memory. + * Used for descriptor writing. + * @addr: [IN ] Linear address to which to write. + * @val: [OUT] Value write to memory, zero-extended to 'u_long'. + * @bytes: [IN ] Number of bytes to write to memory. + */ + int (*write_std)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, + struct x86_exception *fault); + /* + * fetch: Read bytes of standard (non-emulated/special) memory. + * Used for instruction fetch. + * @addr: [IN ] Linear address from which to read. + * @val: [OUT] Value read from memory, zero-extended to 'u_long'. + * @bytes: [IN ] Number of bytes to read from memory. + */ + int (*fetch)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, + struct x86_exception *fault); + + /* + * read_emulated: Read bytes from emulated/special memory area. + * @addr: [IN ] Linear address from which to read. + * @val: [OUT] Value read from memory, zero-extended to 'u_long'. + * @bytes: [IN ] Number of bytes to read from memory. + */ + int (*read_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, + struct x86_exception *fault); + + /* + * write_emulated: Write bytes to emulated/special memory area. + * @addr: [IN ] Linear address to which to write. + * @val: [IN ] Value to write to memory (low-order bytes used as + * required). + * @bytes: [IN ] Number of bytes to write to memory. + */ + int (*write_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, const void *val, + unsigned int bytes, + struct x86_exception *fault); + + /* + * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an + * emulated/special memory area. + * @addr: [IN ] Linear address to access. + * @old: [IN ] Value expected to be current at @addr. + * @new: [IN ] Value to write to @addr. + * @bytes: [IN ] Number of bytes to access using CMPXCHG. + */ + int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt, + unsigned long addr, + const void *old, + const void *new, + unsigned int bytes, + struct x86_exception *fault); + void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr); + + int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, void *val, + unsigned int count); + + int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt, + int size, unsigned short port, const void *val, + unsigned int count); + + bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector, + struct desc_struct *desc, u32 *base3, int seg); + void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector, + struct desc_struct *desc, u32 base3, int seg); + unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt, + int seg); + void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); + ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); + int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); + int (*cpl)(struct x86_emulate_ctxt *ctxt); + int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); + int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); + int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); + int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); + int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata); + void (*halt)(struct x86_emulate_ctxt *ctxt); + void (*wbinvd)(struct x86_emulate_ctxt *ctxt); + int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); + void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ + void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ + int (*intercept)(struct x86_emulate_ctxt *ctxt, + struct x86_instruction_info *info, + enum x86_intercept_stage stage); + + bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, + u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); +}; + +typedef u32 __attribute__((vector_size(16))) sse128_t; + +/* Type, address-of, and value of an instruction's operand. */ +struct operand { + enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type; + unsigned int bytes; + union { + unsigned long orig_val; + u64 orig_val64; + }; + union { + unsigned long *reg; + struct segmented_address { + ulong ea; + unsigned seg; + } mem; + unsigned xmm; + } addr; + union { + unsigned long val; + u64 val64; + char valptr[sizeof(unsigned long) + 2]; + sse128_t vec_val; + }; +}; + +struct fetch_cache { + u8 data[15]; + unsigned long start; + unsigned long end; +}; + +struct read_cache { + u8 data[1024]; + unsigned long pos; + unsigned long end; +}; + +struct x86_emulate_ctxt { + struct x86_emulate_ops *ops; + + /* Register state before/after emulation. */ + unsigned long eflags; + unsigned long eip; /* eip before instruction emulation */ + /* Emulated execution mode, represented by an X86EMUL_MODE value. */ + int mode; + + /* interruptibility state, as a result of execution of STI or MOV SS */ + int interruptibility; + + bool guest_mode; /* guest running a nested guest */ + bool perm_ok; /* do not check permissions if true */ + bool only_vendor_specific_insn; + + bool have_exception; + struct x86_exception exception; + + /* decode cache */ + u8 twobyte; + u8 b; + u8 intercept; + u8 lock_prefix; + u8 rep_prefix; + u8 op_bytes; + u8 ad_bytes; + u8 rex_prefix; + struct operand src; + struct operand src2; + struct operand dst; + bool has_seg_override; + u8 seg_override; + u64 d; + int (*execute)(struct x86_emulate_ctxt *ctxt); + int (*check_perm)(struct x86_emulate_ctxt *ctxt); + /* modrm */ + u8 modrm; + u8 modrm_mod; + u8 modrm_reg; + u8 modrm_rm; + u8 modrm_seg; + bool rip_relative; + unsigned long _eip; + /* Fields above regs are cleared together. */ + unsigned long regs[NR_VCPU_REGS]; + struct operand memop; + struct operand *memopp; + struct fetch_cache fetch; + struct read_cache io_read; + struct read_cache mem_read; +}; + +/* Repeat String Operation Prefix */ +#define REPE_PREFIX 0xf3 +#define REPNE_PREFIX 0xf2 + +/* Execution mode, passed to the emulator. */ +#define X86EMUL_MODE_REAL 0 /* Real mode. */ +#define X86EMUL_MODE_VM86 1 /* Virtual 8086 mode. */ +#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */ +#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ +#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ + +/* any protected mode */ +#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \ + X86EMUL_MODE_PROT64) + +/* CPUID vendors */ +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65 + +#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx 0x69444d41 +#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx 0x21726574 +#define X86EMUL_CPUID_VENDOR_AMDisbetterI_edx 0x74656273 + +#define X86EMUL_CPUID_VENDOR_GenuineIntel_ebx 0x756e6547 +#define X86EMUL_CPUID_VENDOR_GenuineIntel_ecx 0x6c65746e +#define X86EMUL_CPUID_VENDOR_GenuineIntel_edx 0x49656e69 + +enum x86_intercept_stage { + X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */ + X86_ICPT_PRE_EXCEPT, + X86_ICPT_POST_EXCEPT, + X86_ICPT_POST_MEMACCESS, +}; + +enum x86_intercept { + x86_intercept_none, + x86_intercept_cr_read, + x86_intercept_cr_write, + x86_intercept_clts, + x86_intercept_lmsw, + x86_intercept_smsw, + x86_intercept_dr_read, + x86_intercept_dr_write, + x86_intercept_lidt, + x86_intercept_sidt, + x86_intercept_lgdt, + x86_intercept_sgdt, + x86_intercept_lldt, + x86_intercept_sldt, + x86_intercept_ltr, + x86_intercept_str, + x86_intercept_rdtsc, + x86_intercept_rdpmc, + x86_intercept_pushf, + x86_intercept_popf, + x86_intercept_cpuid, + x86_intercept_rsm, + x86_intercept_iret, + x86_intercept_intn, + x86_intercept_invd, + x86_intercept_pause, + x86_intercept_hlt, + x86_intercept_invlpg, + x86_intercept_invlpga, + x86_intercept_vmrun, + x86_intercept_vmload, + x86_intercept_vmsave, + x86_intercept_vmmcall, + x86_intercept_stgi, + x86_intercept_clgi, + x86_intercept_skinit, + x86_intercept_rdtscp, + x86_intercept_icebp, + x86_intercept_wbinvd, + x86_intercept_monitor, + x86_intercept_mwait, + x86_intercept_rdmsr, + x86_intercept_wrmsr, + x86_intercept_in, + x86_intercept_ins, + x86_intercept_out, + x86_intercept_outs, + + nr_x86_intercepts +}; + +/* Host execution mode. */ +#if defined(CONFIG_X86_32) +#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 +#elif defined(CONFIG_X86_64) +#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 +#endif + +int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); +bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); +#define EMULATION_FAILED -1 +#define EMULATION_OK 0 +#define EMULATION_RESTART 1 +#define EMULATION_INTERCEPTED 2 +int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); +int emulator_task_switch(struct x86_emulate_ctxt *ctxt, + u16 tss_selector, int reason, + bool has_error_code, u32 error_code); +int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); +#endif /* _ASM_X86_KVM_X86_EMULATE_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h new file mode 100644 index 00000000000..52d6640a5ca --- /dev/null +++ b/arch/x86/include/asm/kvm_host.h @@ -0,0 +1,943 @@ +/* + * Kernel-based Virtual Machine driver for Linux + * + * This header defines architecture specific interfaces, x86 version + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef _ASM_X86_KVM_HOST_H +#define _ASM_X86_KVM_HOST_H + +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/mmu_notifier.h> +#include <linux/tracepoint.h> +#include <linux/cpumask.h> +#include <linux/irq_work.h> + +#include <linux/kvm.h> +#include <linux/kvm_para.h> +#include <linux/kvm_types.h> +#include <linux/perf_event.h> + +#include <asm/pvclock-abi.h> +#include <asm/desc.h> +#include <asm/mtrr.h> +#include <asm/msr-index.h> + +#define KVM_MAX_VCPUS 254 +#define KVM_SOFT_MAX_VCPUS 64 +#define KVM_MEMORY_SLOTS 32 +/* memory slots that does not exposed to userspace */ +#define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) + +#define KVM_MMIO_SIZE 16 + +#define KVM_PIO_PAGE_OFFSET 1 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 2 + +#define CR0_RESERVED_BITS \ + (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ + | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ + | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) + +#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) +#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) +#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ + 0xFFFFFF0000000000ULL) +#define CR4_RESERVED_BITS \ + (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ + | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \ + | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) + +#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) + + + +#define INVALID_PAGE (~(hpa_t)0) +#define VALID_PAGE(x) ((x) != INVALID_PAGE) + +#define UNMAPPED_GVA (~(gpa_t)0) + +/* KVM Hugepage definitions for x86 */ +#define KVM_NR_PAGE_SIZES 3 +#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) +#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) +#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) +#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) +#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) + +#define DE_VECTOR 0 +#define DB_VECTOR 1 +#define BP_VECTOR 3 +#define OF_VECTOR 4 +#define BR_VECTOR 5 +#define UD_VECTOR 6 +#define NM_VECTOR 7 +#define DF_VECTOR 8 +#define TS_VECTOR 10 +#define NP_VECTOR 11 +#define SS_VECTOR 12 +#define GP_VECTOR 13 +#define PF_VECTOR 14 +#define MF_VECTOR 16 +#define MC_VECTOR 18 + +#define SELECTOR_TI_MASK (1 << 2) +#define SELECTOR_RPL_MASK 0x03 + +#define IOPL_SHIFT 12 + +#define KVM_PERMILLE_MMU_PAGES 20 +#define KVM_MIN_ALLOC_MMU_PAGES 64 +#define KVM_MMU_HASH_SHIFT 10 +#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) +#define KVM_MIN_FREE_MMU_PAGES 5 +#define KVM_REFILL_PAGES 25 +#define KVM_MAX_CPUID_ENTRIES 80 +#define KVM_NR_FIXED_MTRR_REGION 88 +#define KVM_NR_VAR_MTRR 8 + +#define ASYNC_PF_PER_VCPU 64 + +extern raw_spinlock_t kvm_lock; +extern struct list_head vm_list; + +struct kvm_vcpu; +struct kvm; +struct kvm_async_pf; + +enum kvm_reg { + VCPU_REGS_RAX = 0, + VCPU_REGS_RCX = 1, + VCPU_REGS_RDX = 2, + VCPU_REGS_RBX = 3, + VCPU_REGS_RSP = 4, + VCPU_REGS_RBP = 5, + VCPU_REGS_RSI = 6, + VCPU_REGS_RDI = 7, +#ifdef CONFIG_X86_64 + VCPU_REGS_R8 = 8, + VCPU_REGS_R9 = 9, + VCPU_REGS_R10 = 10, + VCPU_REGS_R11 = 11, + VCPU_REGS_R12 = 12, + VCPU_REGS_R13 = 13, + VCPU_REGS_R14 = 14, + VCPU_REGS_R15 = 15, +#endif + VCPU_REGS_RIP, + NR_VCPU_REGS +}; + +enum kvm_reg_ex { + VCPU_EXREG_PDPTR = NR_VCPU_REGS, + VCPU_EXREG_CR3, + VCPU_EXREG_RFLAGS, + VCPU_EXREG_CPL, + VCPU_EXREG_SEGMENTS, +}; + +enum { + VCPU_SREG_ES, + VCPU_SREG_CS, + VCPU_SREG_SS, + VCPU_SREG_DS, + VCPU_SREG_FS, + VCPU_SREG_GS, + VCPU_SREG_TR, + VCPU_SREG_LDTR, +}; + +#include <asm/kvm_emulate.h> + +#define KVM_NR_MEM_OBJS 40 + +#define KVM_NR_DB_REGS 4 + +#define DR6_BD (1 << 13) +#define DR6_BS (1 << 14) +#define DR6_FIXED_1 0xffff0ff0 +#define DR6_VOLATILE 0x0000e00f + +#define DR7_BP_EN_MASK 0x000000ff +#define DR7_GE (1 << 9) +#define DR7_GD (1 << 13) +#define DR7_FIXED_1 0x00000400 +#define DR7_VOLATILE 0xffff23ff + +/* + * We don't want allocation failures within the mmu code, so we preallocate + * enough memory for a single page fault in a cache. + */ +struct kvm_mmu_memory_cache { + int nobjs; + void *objects[KVM_NR_MEM_OBJS]; +}; + +#define NR_PTE_CHAIN_ENTRIES 5 + +struct kvm_pte_chain { + u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES]; + struct hlist_node link; +}; + +/* + * kvm_mmu_page_role, below, is defined as: + * + * bits 0:3 - total guest paging levels (2-4, or zero for real mode) + * bits 4:7 - page table level for this shadow (1-4) + * bits 8:9 - page table quadrant for 2-level guests + * bit 16 - direct mapping of virtual to physical mapping at gfn + * used for real mode and two-dimensional paging + * bits 17:19 - common access permissions for all ptes in this shadow page + */ +union kvm_mmu_page_role { + unsigned word; + struct { + unsigned level:4; + unsigned cr4_pae:1; + unsigned quadrant:2; + unsigned pad_for_nice_hex_output:6; + unsigned direct:1; + unsigned access:3; + unsigned invalid:1; + unsigned nxe:1; + unsigned cr0_wp:1; + unsigned smep_andnot_wp:1; + }; +}; + +struct kvm_mmu_page { + struct list_head link; + struct hlist_node hash_link; + + /* + * The following two entries are used to key the shadow page in the + * hash table. + */ + gfn_t gfn; + union kvm_mmu_page_role role; + + u64 *spt; + /* hold the gfn of each spte inside spt */ + gfn_t *gfns; + /* + * One bit set per slot which has memory + * in this shadow page. + */ + DECLARE_BITMAP(slot_bitmap, KVM_MEM_SLOTS_NUM); + bool unsync; + int root_count; /* Currently serving as active root */ + unsigned int unsync_children; + unsigned long parent_ptes; /* Reverse mapping for parent_pte */ + DECLARE_BITMAP(unsync_child_bitmap, 512); + +#ifdef CONFIG_X86_32 + int clear_spte_count; +#endif + + int write_flooding_count; + + struct rcu_head rcu; +}; + +struct kvm_pio_request { + unsigned long count; + int in; + int port; + int size; +}; + +/* + * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level + * 32-bit). The kvm_mmu structure abstracts the details of the current mmu + * mode. + */ +struct kvm_mmu { + void (*new_cr3)(struct kvm_vcpu *vcpu); + void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); + unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); + u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); + int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, + bool prefault); + void (*inject_page_fault)(struct kvm_vcpu *vcpu, + struct x86_exception *fault); + void (*free)(struct kvm_vcpu *vcpu); + gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, + struct x86_exception *exception); + gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); + int (*sync_page)(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *sp); + void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); + void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, + u64 *spte, const void *pte); + hpa_t root_hpa; + int root_level; + int shadow_root_level; + union kvm_mmu_page_role base_role; + bool direct_map; + + u64 *pae_root; + u64 *lm_root; + u64 rsvd_bits_mask[2][4]; + + bool nx; + + u64 pdptrs[4]; /* pae */ +}; + +enum pmc_type { + KVM_PMC_GP = 0, + KVM_PMC_FIXED, +}; + +struct kvm_pmc { + enum pmc_type type; + u8 idx; + u64 counter; + u64 eventsel; + struct perf_event *perf_event; + struct kvm_vcpu *vcpu; +}; + +struct kvm_pmu { + unsigned nr_arch_gp_counters; + unsigned nr_arch_fixed_counters; + unsigned available_event_types; + u64 fixed_ctr_ctrl; + u64 global_ctrl; + u64 global_status; + u64 global_ovf_ctrl; + u64 counter_bitmask[2]; + u64 global_ctrl_mask; + u8 version; + struct kvm_pmc gp_counters[X86_PMC_MAX_GENERIC]; + struct kvm_pmc fixed_counters[X86_PMC_MAX_FIXED]; + struct irq_work irq_work; + u64 reprogram_pmi; +}; + +struct kvm_vcpu_arch { + /* + * rip and regs accesses must go through + * kvm_{register,rip}_{read,write} functions. + */ + unsigned long regs[NR_VCPU_REGS]; + u32 regs_avail; + u32 regs_dirty; + + unsigned long cr0; + unsigned long cr0_guest_owned_bits; + unsigned long cr2; + unsigned long cr3; + unsigned long cr4; + unsigned long cr4_guest_owned_bits; + unsigned long cr8; + u32 hflags; + u64 efer; + u64 apic_base; + struct kvm_lapic *apic; /* kernel irqchip context */ + int32_t apic_arb_prio; + int mp_state; + int sipi_vector; + u64 ia32_misc_enable_msr; + bool tpr_access_reporting; + + /* + * Paging state of the vcpu + * + * If the vcpu runs in guest mode with two level paging this still saves + * the paging mode of the l1 guest. This context is always used to + * handle faults. + */ + struct kvm_mmu mmu; + + /* + * Paging state of an L2 guest (used for nested npt) + * + * This context will save all necessary information to walk page tables + * of the an L2 guest. This context is only initialized for page table + * walking and not for faulting since we never handle l2 page faults on + * the host. + */ + struct kvm_mmu nested_mmu; + + /* + * Pointer to the mmu context currently used for + * gva_to_gpa translations. + */ + struct kvm_mmu *walk_mmu; + + struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; + struct kvm_mmu_memory_cache mmu_page_cache; + struct kvm_mmu_memory_cache mmu_page_header_cache; + + struct fpu guest_fpu; + u64 xcr0; + + struct kvm_pio_request pio; + void *pio_data; + + u8 event_exit_inst_len; + + struct kvm_queued_exception { + bool pending; + bool has_error_code; + bool reinject; + u8 nr; + u32 error_code; + } exception; + + struct kvm_queued_interrupt { + bool pending; + bool soft; + u8 nr; + } interrupt; + + int halt_request; /* real mode on Intel only */ + + int cpuid_nent; + struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES]; + /* emulate context */ + + struct x86_emulate_ctxt emulate_ctxt; + bool emulate_regs_need_sync_to_vcpu; + bool emulate_regs_need_sync_from_vcpu; + + gpa_t time; + struct pvclock_vcpu_time_info hv_clock; + unsigned int hw_tsc_khz; + unsigned int time_offset; + struct page *time_page; + + struct { + u64 msr_val; + u64 last_steal; + u64 accum_steal; + struct gfn_to_hva_cache stime; + struct kvm_steal_time steal; + } st; + + u64 last_guest_tsc; + u64 last_kernel_ns; + u64 last_tsc_nsec; + u64 last_tsc_write; + u32 virtual_tsc_khz; + bool tsc_catchup; + u32 tsc_catchup_mult; + s8 tsc_catchup_shift; + + atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ + unsigned nmi_pending; /* NMI queued after currently running handler */ + bool nmi_injected; /* Trying to inject an NMI this entry */ + + struct mtrr_state_type mtrr_state; + u32 pat; + + int switch_db_regs; + unsigned long db[KVM_NR_DB_REGS]; + unsigned long dr6; + unsigned long dr7; + unsigned long eff_db[KVM_NR_DB_REGS]; + + u64 mcg_cap; + u64 mcg_status; + u64 mcg_ctl; + u64 *mce_banks; + + /* Cache MMIO info */ + u64 mmio_gva; + unsigned access; + gfn_t mmio_gfn; + + struct kvm_pmu pmu; + + /* used for guest single stepping over the given code position */ + unsigned long singlestep_rip; + + /* fields used by HYPER-V emulation */ + u64 hv_vapic; + + cpumask_var_t wbinvd_dirty_mask; + + unsigned long last_retry_eip; + unsigned long last_retry_addr; + + struct { + bool halted; + gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; + struct gfn_to_hva_cache data; + u64 msr_val; + u32 id; + bool send_user_only; + } apf; +}; + +struct kvm_arch { + unsigned int n_used_mmu_pages; + unsigned int n_requested_mmu_pages; + unsigned int n_max_mmu_pages; + unsigned int indirect_shadow_pages; + struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; + /* + * Hash table of struct kvm_mmu_page. + */ + struct list_head active_mmu_pages; + struct list_head assigned_dev_head; + struct iommu_domain *iommu_domain; + int iommu_flags; + struct kvm_pic *vpic; + struct kvm_ioapic *vioapic; + struct kvm_pit *vpit; + int vapics_in_nmi_mode; + + unsigned int tss_addr; + struct page *apic_access_page; + + gpa_t wall_clock; + + struct page *ept_identity_pagetable; + bool ept_identity_pagetable_done; + gpa_t ept_identity_map_addr; + + unsigned long irq_sources_bitmap; + s64 kvmclock_offset; + raw_spinlock_t tsc_write_lock; + u64 last_tsc_nsec; + u64 last_tsc_offset; + u64 last_tsc_write; + + struct kvm_xen_hvm_config xen_hvm_config; + + /* fields used by HYPER-V emulation */ + u64 hv_guest_os_id; + u64 hv_hypercall; + + atomic_t reader_counter; + + #ifdef CONFIG_KVM_MMU_AUDIT + int audit_point; + #endif +}; + +struct kvm_vm_stat { + u32 mmu_shadow_zapped; + u32 mmu_pte_write; + u32 mmu_pte_updated; + u32 mmu_pde_zapped; + u32 mmu_flooded; + u32 mmu_recycled; + u32 mmu_cache_miss; + u32 mmu_unsync; + u32 remote_tlb_flush; + u32 lpages; +}; + +struct kvm_vcpu_stat { + u32 pf_fixed; + u32 pf_guest; + u32 tlb_flush; + u32 invlpg; + + u32 exits; + u32 io_exits; + u32 mmio_exits; + u32 signal_exits; + u32 irq_window_exits; + u32 nmi_window_exits; + u32 halt_exits; + u32 halt_wakeup; + u32 request_irq_exits; + u32 irq_exits; + u32 host_state_reload; + u32 efer_reload; + u32 fpu_reload; + u32 insn_emulation; + u32 insn_emulation_fail; + u32 hypercalls; + u32 irq_injections; + u32 nmi_injections; +}; + +struct x86_instruction_info; + +struct kvm_x86_ops { + int (*cpu_has_kvm_support)(void); /* __init */ + int (*disabled_by_bios)(void); /* __init */ + int (*hardware_enable)(void *dummy); + void (*hardware_disable)(void *dummy); + void (*check_processor_compatibility)(void *rtn); + int (*hardware_setup)(void); /* __init */ + void (*hardware_unsetup)(void); /* __exit */ + bool (*cpu_has_accelerated_tpr)(void); + void (*cpuid_update)(struct kvm_vcpu *vcpu); + + /* Create, but do not attach this VCPU */ + struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); + void (*vcpu_free)(struct kvm_vcpu *vcpu); + int (*vcpu_reset)(struct kvm_vcpu *vcpu); + + void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); + void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); + void (*vcpu_put)(struct kvm_vcpu *vcpu); + + void (*set_guest_debug)(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg); + int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); + int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); + u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); + void (*get_segment)(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg); + int (*get_cpl)(struct kvm_vcpu *vcpu); + void (*set_segment)(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg); + void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); + void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); + void (*decache_cr3)(struct kvm_vcpu *vcpu); + void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); + void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); + void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); + int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); + void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); + void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); + void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); + void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); + void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); + void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); + void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); + unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); + void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); + void (*fpu_activate)(struct kvm_vcpu *vcpu); + void (*fpu_deactivate)(struct kvm_vcpu *vcpu); + + void (*tlb_flush)(struct kvm_vcpu *vcpu); + + void (*run)(struct kvm_vcpu *vcpu); + int (*handle_exit)(struct kvm_vcpu *vcpu); + void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); + void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); + u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); + void (*patch_hypercall)(struct kvm_vcpu *vcpu, + unsigned char *hypercall_addr); + void (*set_irq)(struct kvm_vcpu *vcpu); + void (*set_nmi)(struct kvm_vcpu *vcpu); + void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, + bool has_error_code, u32 error_code, + bool reinject); + void (*cancel_injection)(struct kvm_vcpu *vcpu); + int (*interrupt_allowed)(struct kvm_vcpu *vcpu); + int (*nmi_allowed)(struct kvm_vcpu *vcpu); + bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); + void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); + void (*enable_nmi_window)(struct kvm_vcpu *vcpu); + void (*enable_irq_window)(struct kvm_vcpu *vcpu); + void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); + int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); + int (*get_tdp_level)(void); + u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); + int (*get_lpage_level)(void); + bool (*rdtscp_supported)(void); + void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment); + + void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); + + void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); + + bool (*has_wbinvd_exit)(void); + + void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); + void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + + u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); + u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu); + + void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); + + int (*check_intercept)(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage); +}; + +struct kvm_arch_async_pf { + u32 token; + gfn_t gfn; + unsigned long cr3; + bool direct_map; +}; + +extern struct kvm_x86_ops *kvm_x86_ops; + +int kvm_mmu_module_init(void); +void kvm_mmu_module_exit(void); + +void kvm_mmu_destroy(struct kvm_vcpu *vcpu); +int kvm_mmu_create(struct kvm_vcpu *vcpu); +int kvm_mmu_setup(struct kvm_vcpu *vcpu); +void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, + u64 dirty_mask, u64 nx_mask, u64 x_mask); + +int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); +void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); +int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, + struct kvm_memory_slot *slot); +void kvm_mmu_zap_all(struct kvm *kvm); +unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); +void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); + +int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); + +int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, + const void *val, int bytes); +u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); + +extern bool tdp_enabled; + +u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); + +/* control of guest tsc rate supported? */ +extern bool kvm_has_tsc_control; +/* minimum supported tsc_khz for guests */ +extern u32 kvm_min_guest_tsc_khz; +/* maximum supported tsc_khz for guests */ +extern u32 kvm_max_guest_tsc_khz; + +enum emulation_result { + EMULATE_DONE, /* no further processing */ + EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ + EMULATE_FAIL, /* can't emulate this instruction */ +}; + +#define EMULTYPE_NO_DECODE (1 << 0) +#define EMULTYPE_TRAP_UD (1 << 1) +#define EMULTYPE_SKIP (1 << 2) +#define EMULTYPE_RETRY (1 << 3) +int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, + int emulation_type, void *insn, int insn_len); + +static inline int emulate_instruction(struct kvm_vcpu *vcpu, + int emulation_type) +{ + return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); +} + +void kvm_enable_efer_bits(u64); +int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); +int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); + +struct x86_emulate_ctxt; + +int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); +void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); +int kvm_emulate_halt(struct kvm_vcpu *vcpu); +int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); + +void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); +int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); + +int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, + bool has_error_code, u32 error_code); + +int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); +int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); +int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); +int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); +int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); +unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); +void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); +void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); +int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr); + +int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); +int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); + +unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); +void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); +bool kvm_rdpmc(struct kvm_vcpu *vcpu); + +void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); +void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); +void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); +void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); +void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); +int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + gfn_t gfn, void *data, int offset, int len, + u32 access); +void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); +bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); + +int kvm_pic_set_irq(void *opaque, int irq, int level); + +void kvm_inject_nmi(struct kvm_vcpu *vcpu); + +int fx_init(struct kvm_vcpu *vcpu); + +void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); +void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, + const u8 *new, int bytes); +int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); +int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); +void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); +int kvm_mmu_load(struct kvm_vcpu *vcpu); +void kvm_mmu_unload(struct kvm_vcpu *vcpu); +void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); +gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); +gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, + struct x86_exception *exception); +gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, + struct x86_exception *exception); +gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, + struct x86_exception *exception); +gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, + struct x86_exception *exception); + +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); + +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, + void *insn, int insn_len); +void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); + +void kvm_enable_tdp(void); +void kvm_disable_tdp(void); + +int complete_pio(struct kvm_vcpu *vcpu); +bool kvm_check_iopl(struct kvm_vcpu *vcpu); + +static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) +{ + return gpa; +} + +static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) +{ + struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); + + return (struct kvm_mmu_page *)page_private(page); +} + +static inline u16 kvm_read_ldt(void) +{ + u16 ldt; + asm("sldt %0" : "=g"(ldt)); + return ldt; +} + +static inline void kvm_load_ldt(u16 sel) +{ + asm("lldt %0" : : "rm"(sel)); +} + +#ifdef CONFIG_X86_64 +static inline unsigned long read_msr(unsigned long msr) +{ + u64 value; + + rdmsrl(msr, value); + return value; +} +#endif + +static inline u32 get_rdx_init_val(void) +{ + return 0x600; /* P6 family */ +} + +static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) +{ + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); +} + +#define TSS_IOPB_BASE_OFFSET 0x66 +#define TSS_BASE_SIZE 0x68 +#define TSS_IOPB_SIZE (65536 / 8) +#define TSS_REDIRECTION_SIZE (256 / 8) +#define RMODE_TSS_SIZE \ + (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1) + +enum { + TASK_SWITCH_CALL = 0, + TASK_SWITCH_IRET = 1, + TASK_SWITCH_JMP = 2, + TASK_SWITCH_GATE = 3, +}; + +#define HF_GIF_MASK (1 << 0) +#define HF_HIF_MASK (1 << 1) +#define HF_VINTR_MASK (1 << 2) +#define HF_NMI_MASK (1 << 3) +#define HF_IRET_MASK (1 << 4) +#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ + +/* + * Hardware virtualization extension instructions may fault if a + * reboot turns off virtualization while processes are running. + * Trap the fault and ignore the instruction if that happens. + */ +asmlinkage void kvm_spurious_fault(void); +extern bool kvm_rebooting; + +#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ + "666: " insn "\n\t" \ + "668: \n\t" \ + ".pushsection .fixup, \"ax\" \n" \ + "667: \n\t" \ + cleanup_insn "\n\t" \ + "cmpb $0, kvm_rebooting \n\t" \ + "jne 668b \n\t" \ + __ASM_SIZE(push) " $666b \n\t" \ + "call kvm_spurious_fault \n\t" \ + ".popsection \n\t" \ + ".pushsection __ex_table, \"a\" \n\t" \ + _ASM_PTR " 666b, 667b \n\t" \ + ".popsection" + +#define __kvm_handle_fault_on_reboot(insn) \ + ____kvm_handle_fault_on_reboot(insn, "") + +#define KVM_ARCH_WANT_MMU_NOTIFIER +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +int kvm_age_hva(struct kvm *kvm, unsigned long hva); +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); +int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); +int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); +int kvm_cpu_get_interrupt(struct kvm_vcpu *v); + +void kvm_define_shared_msr(unsigned index, u32 msr); +void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); + +bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); + +void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); +void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); +void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); +bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); +extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); + +void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); + +int kvm_is_in_guest(void); + +void kvm_pmu_init(struct kvm_vcpu *vcpu); +void kvm_pmu_destroy(struct kvm_vcpu *vcpu); +void kvm_pmu_reset(struct kvm_vcpu *vcpu); +void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); +bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); +int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); +int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); +int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); +void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); +void kvm_deliver_pmi(struct kvm_vcpu *vcpu); + +#endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h new file mode 100644 index 00000000000..734c3767cfa --- /dev/null +++ b/arch/x86/include/asm/kvm_para.h @@ -0,0 +1,213 @@ +#ifndef _ASM_X86_KVM_PARA_H +#define _ASM_X86_KVM_PARA_H + +#include <linux/types.h> +#include <asm/hyperv.h> + +/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It + * should be used to determine that a VM is running under KVM. + */ +#define KVM_CPUID_SIGNATURE 0x40000000 + +/* This CPUID returns a feature bitmap in eax. Before enabling a particular + * paravirtualization, the appropriate feature bit should be checked. + */ +#define KVM_CPUID_FEATURES 0x40000001 +#define KVM_FEATURE_CLOCKSOURCE 0 +#define KVM_FEATURE_NOP_IO_DELAY 1 +#define KVM_FEATURE_MMU_OP 2 +/* This indicates that the new set of kvmclock msrs + * are available. The use of 0x11 and 0x12 is deprecated + */ +#define KVM_FEATURE_CLOCKSOURCE2 3 +#define KVM_FEATURE_ASYNC_PF 4 +#define KVM_FEATURE_STEAL_TIME 5 + +/* The last 8 bits are used to indicate how to interpret the flags field + * in pvclock structure. If no bits are set, all flags are ignored. + */ +#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24 + +#define MSR_KVM_WALL_CLOCK 0x11 +#define MSR_KVM_SYSTEM_TIME 0x12 + +#define KVM_MSR_ENABLED 1 +/* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ +#define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 +#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 +#define MSR_KVM_ASYNC_PF_EN 0x4b564d02 +#define MSR_KVM_STEAL_TIME 0x4b564d03 + +struct kvm_steal_time { + __u64 steal; + __u32 version; + __u32 flags; + __u32 pad[12]; +}; + +#define KVM_STEAL_ALIGNMENT_BITS 5 +#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1))) +#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1) + +#define KVM_MAX_MMU_OP_BATCH 32 + +#define KVM_ASYNC_PF_ENABLED (1 << 0) +#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) + +/* Operations for KVM_HC_MMU_OP */ +#define KVM_MMU_OP_WRITE_PTE 1 +#define KVM_MMU_OP_FLUSH_TLB 2 +#define KVM_MMU_OP_RELEASE_PT 3 + +/* Payload for KVM_HC_MMU_OP */ +struct kvm_mmu_op_header { + __u32 op; + __u32 pad; +}; + +struct kvm_mmu_op_write_pte { + struct kvm_mmu_op_header header; + __u64 pte_phys; + __u64 pte_val; +}; + +struct kvm_mmu_op_flush_tlb { + struct kvm_mmu_op_header header; +}; + +struct kvm_mmu_op_release_pt { + struct kvm_mmu_op_header header; + __u64 pt_phys; +}; + +#define KVM_PV_REASON_PAGE_NOT_PRESENT 1 +#define KVM_PV_REASON_PAGE_READY 2 + +struct kvm_vcpu_pv_apf_data { + __u32 reason; + __u8 pad[60]; + __u32 enabled; +}; + +#ifdef __KERNEL__ +#include <asm/processor.h> + +extern void kvmclock_init(void); +extern int kvm_register_clock(char *txt); + + +/* This instruction is vmcall. On non-VT architectures, it will generate a + * trap that we will then rewrite to the appropriate instruction. + */ +#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" + +/* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun + * instruction. The hypervisor may replace it with something else but only the + * instructions are guaranteed to be supported. + * + * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively. + * The hypercall number should be placed in rax and the return value will be + * placed in rax. No other registers will be clobbered unless explicited + * noted by the particular hypercall. + */ + +static inline long kvm_hypercall0(unsigned int nr) +{ + long ret; + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr) + : "memory"); + return ret; +} + +static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) +{ + long ret; + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr), "b"(p1) + : "memory"); + return ret; +} + +static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, + unsigned long p2) +{ + long ret; + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr), "b"(p1), "c"(p2) + : "memory"); + return ret; +} + +static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3) +{ + long ret; + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr), "b"(p1), "c"(p2), "d"(p3) + : "memory"); + return ret; +} + +static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4) +{ + long ret; + asm volatile(KVM_HYPERCALL + : "=a"(ret) + : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4) + : "memory"); + return ret; +} + +static inline int kvm_para_available(void) +{ + unsigned int eax, ebx, ecx, edx; + char signature[13]; + + cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); + memcpy(signature + 0, &ebx, 4); + memcpy(signature + 4, &ecx, 4); + memcpy(signature + 8, &edx, 4); + signature[12] = 0; + + if (strcmp(signature, "KVMKVMKVM") == 0) + return 1; + + return 0; +} + +static inline unsigned int kvm_arch_para_features(void) +{ + return cpuid_eax(KVM_CPUID_FEATURES); +} + +#ifdef CONFIG_KVM_GUEST +void __init kvm_guest_init(void); +void kvm_async_pf_task_wait(u32 token); +void kvm_async_pf_task_wake(u32 token); +u32 kvm_read_and_reset_pf_reason(void); +extern void kvm_disable_steal_time(void); +#else +#define kvm_guest_init() do { } while (0) +#define kvm_async_pf_task_wait(T) do {} while(0) +#define kvm_async_pf_task_wake(T) do {} while(0) +static inline u32 kvm_read_and_reset_pf_reason(void) +{ + return 0; +} + +static inline void kvm_disable_steal_time(void) +{ + return; +} +#endif + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_KVM_PARA_H */ diff --git a/arch/x86/include/asm/ldt.h b/arch/x86/include/asm/ldt.h new file mode 100644 index 00000000000..46727eb37bf --- /dev/null +++ b/arch/x86/include/asm/ldt.h @@ -0,0 +1,40 @@ +/* + * ldt.h + * + * Definitions of structures used with the modify_ldt system call. + */ +#ifndef _ASM_X86_LDT_H +#define _ASM_X86_LDT_H + +/* Maximum number of LDT entries supported. */ +#define LDT_ENTRIES 8192 +/* The size of each LDT entry. */ +#define LDT_ENTRY_SIZE 8 + +#ifndef __ASSEMBLY__ +/* + * Note on 64bit base and limit is ignored and you cannot set DS/ES/CS + * not to the default values if you still want to do syscalls. This + * call is more for 32bit mode therefore. + */ +struct user_desc { + unsigned int entry_number; + unsigned int base_addr; + unsigned int limit; + unsigned int seg_32bit:1; + unsigned int contents:2; + unsigned int read_exec_only:1; + unsigned int limit_in_pages:1; + unsigned int seg_not_present:1; + unsigned int useable:1; +#ifdef __x86_64__ + unsigned int lm:1; +#endif +}; + +#define MODIFY_LDT_CONTENTS_DATA 0 +#define MODIFY_LDT_CONTENTS_STACK 1 +#define MODIFY_LDT_CONTENTS_CODE 2 + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_X86_LDT_H */ diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h new file mode 100644 index 00000000000..0d97deba1e3 --- /dev/null +++ b/arch/x86/include/asm/lguest.h @@ -0,0 +1,99 @@ +#ifndef _ASM_X86_LGUEST_H +#define _ASM_X86_LGUEST_H + +#define GDT_ENTRY_LGUEST_CS 10 +#define GDT_ENTRY_LGUEST_DS 11 +#define LGUEST_CS (GDT_ENTRY_LGUEST_CS * 8) +#define LGUEST_DS (GDT_ENTRY_LGUEST_DS * 8) + +#ifndef __ASSEMBLY__ +#include <asm/desc.h> + +#define GUEST_PL 1 + +/* Every guest maps the core switcher code. */ +#define SHARED_SWITCHER_PAGES \ + DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE) +/* Pages for switcher itself, then two pages per cpu */ +#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids) + +/* We map at -4M (-2M for PAE) for ease of mapping (one PTE page). */ +#ifdef CONFIG_X86_PAE +#define SWITCHER_ADDR 0xFFE00000 +#else +#define SWITCHER_ADDR 0xFFC00000 +#endif + +/* Found in switcher.S */ +extern unsigned long default_idt_entries[]; + +/* Declarations for definitions in lguest_guest.S */ +extern char lguest_noirq_start[], lguest_noirq_end[]; +extern const char lgstart_cli[], lgend_cli[]; +extern const char lgstart_sti[], lgend_sti[]; +extern const char lgstart_popf[], lgend_popf[]; +extern const char lgstart_pushf[], lgend_pushf[]; +extern const char lgstart_iret[], lgend_iret[]; + +extern void lguest_iret(void); +extern void lguest_init(void); + +struct lguest_regs { + /* Manually saved part. */ + unsigned long eax, ebx, ecx, edx; + unsigned long esi, edi, ebp; + unsigned long gs; + unsigned long fs, ds, es; + unsigned long trapnum, errcode; + /* Trap pushed part */ + unsigned long eip; + unsigned long cs; + unsigned long eflags; + unsigned long esp; + unsigned long ss; +}; + +/* This is a guest-specific page (mapped ro) into the guest. */ +struct lguest_ro_state { + /* Host information we need to restore when we switch back. */ + u32 host_cr3; + struct desc_ptr host_idt_desc; + struct desc_ptr host_gdt_desc; + u32 host_sp; + + /* Fields which are used when guest is running. */ + struct desc_ptr guest_idt_desc; + struct desc_ptr guest_gdt_desc; + struct x86_hw_tss guest_tss; + struct desc_struct guest_idt[IDT_ENTRIES]; + struct desc_struct guest_gdt[GDT_ENTRIES]; +}; + +struct lg_cpu_arch { + /* The GDT entries copied into lguest_ro_state when running. */ + struct desc_struct gdt[GDT_ENTRIES]; + + /* The IDT entries: some copied into lguest_ro_state when running. */ + struct desc_struct idt[IDT_ENTRIES]; + + /* The address of the last guest-visible pagefault (ie. cr2). */ + unsigned long last_pagefault; +}; + +static inline void lguest_set_ts(void) +{ + u32 cr0; + + cr0 = read_cr0(); + if (!(cr0 & 8)) + write_cr0(cr0 | 8); +} + +/* Full 4G segment descriptors, suitable for CS and DS. */ +#define FULL_EXEC_SEGMENT \ + ((struct desc_struct)GDT_ENTRY_INIT(0xc09b, 0, 0xfffff)) +#define FULL_SEGMENT ((struct desc_struct)GDT_ENTRY_INIT(0xc093, 0, 0xfffff)) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_LGUEST_H */ diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h new file mode 100644 index 00000000000..879fd7d3387 --- /dev/null +++ b/arch/x86/include/asm/lguest_hcall.h @@ -0,0 +1,76 @@ +/* Architecture specific portion of the lguest hypercalls */ +#ifndef _ASM_X86_LGUEST_HCALL_H +#define _ASM_X86_LGUEST_HCALL_H + +#define LHCALL_FLUSH_ASYNC 0 +#define LHCALL_LGUEST_INIT 1 +#define LHCALL_SHUTDOWN 2 +#define LHCALL_NEW_PGTABLE 4 +#define LHCALL_FLUSH_TLB 5 +#define LHCALL_LOAD_IDT_ENTRY 6 +#define LHCALL_SET_STACK 7 +#define LHCALL_TS 8 +#define LHCALL_SET_CLOCKEVENT 9 +#define LHCALL_HALT 10 +#define LHCALL_SET_PMD 13 +#define LHCALL_SET_PTE 14 +#define LHCALL_SET_PGD 15 +#define LHCALL_LOAD_TLS 16 +#define LHCALL_NOTIFY 17 +#define LHCALL_LOAD_GDT_ENTRY 18 +#define LHCALL_SEND_INTERRUPTS 19 + +#define LGUEST_TRAP_ENTRY 0x1F + +/* Argument number 3 to LHCALL_LGUEST_SHUTDOWN */ +#define LGUEST_SHUTDOWN_POWEROFF 1 +#define LGUEST_SHUTDOWN_RESTART 2 + +#ifndef __ASSEMBLY__ +#include <asm/hw_irq.h> + +/*G:030 + * But first, how does our Guest contact the Host to ask for privileged + * operations? There are two ways: the direct way is to make a "hypercall", + * to make requests of the Host Itself. + * + * Our hypercall mechanism uses the highest unused trap code (traps 32 and + * above are used by real hardware interrupts). Seventeen hypercalls are + * available: the hypercall number is put in the %eax register, and the + * arguments (when required) are placed in %ebx, %ecx, %edx and %esi. + * If a return value makes sense, it's returned in %eax. + * + * Grossly invalid calls result in Sudden Death at the hands of the vengeful + * Host, rather than returning failure. This reflects Winston Churchill's + * definition of a gentleman: "someone who is only rude intentionally". + */ +static inline unsigned long +hcall(unsigned long call, + unsigned long arg1, unsigned long arg2, unsigned long arg3, + unsigned long arg4) +{ + /* "int" is the Intel instruction to trigger a trap. */ + asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY) + /* The call in %eax (aka "a") might be overwritten */ + : "=a"(call) + /* The arguments are in %eax, %ebx, %ecx, %edx & %esi */ + : "a"(call), "b"(arg1), "c"(arg2), "d"(arg3), "S"(arg4) + /* "memory" means this might write somewhere in memory. + * This isn't true for all calls, but it's safe to tell + * gcc that it might happen so it doesn't get clever. */ + : "memory"); + return call; +} +/*:*/ + +/* Can't use our min() macro here: needs to be a constant */ +#define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) + +#define LHCALL_RING_SIZE 64 +struct hcall_args { + /* These map directly onto eax/ebx/ecx/edx/esi in struct lguest_regs */ + unsigned long arg0, arg1, arg2, arg3, arg4; +}; + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_X86_LGUEST_HCALL_H */ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h new file mode 100644 index 00000000000..48142971b25 --- /dev/null +++ b/arch/x86/include/asm/linkage.h @@ -0,0 +1,61 @@ +#ifndef _ASM_X86_LINKAGE_H +#define _ASM_X86_LINKAGE_H + +#include <linux/stringify.h> + +#undef notrace +#define notrace __attribute__((no_instrument_function)) + +#ifdef CONFIG_X86_32 +#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) + +/* + * Make sure the compiler doesn't do anything stupid with the + * arguments on the stack - they are owned by the *caller*, not + * the callee. This just fools gcc into not spilling into them, + * and keeps it from doing tailcall recursion and/or using the + * stack slots for temporaries, since they are live and "used" + * all the way to the end of the function. + * + * NOTE! On x86-64, all the arguments are in registers, so this + * only matters on a 32-bit kernel. + */ +#define asmlinkage_protect(n, ret, args...) \ + __asmlinkage_protect##n(ret, ##args) +#define __asmlinkage_protect_n(ret, args...) \ + __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args) +#define __asmlinkage_protect0(ret) \ + __asmlinkage_protect_n(ret) +#define __asmlinkage_protect1(ret, arg1) \ + __asmlinkage_protect_n(ret, "g" (arg1)) +#define __asmlinkage_protect2(ret, arg1, arg2) \ + __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2)) +#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ + __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3)) +#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ + __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ + "g" (arg4)) +#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ + __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ + "g" (arg4), "g" (arg5)) +#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ + __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ + "g" (arg4), "g" (arg5), "g" (arg6)) + +#endif /* CONFIG_X86_32 */ + +#ifdef __ASSEMBLY__ + +#define GLOBAL(name) \ + .globl name; \ + name: + +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16) +#define __ALIGN .p2align 4, 0x90 +#define __ALIGN_STR __stringify(__ALIGN) +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_LINKAGE_H */ + diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h new file mode 100644 index 00000000000..9cdae5d47e8 --- /dev/null +++ b/arch/x86/include/asm/local.h @@ -0,0 +1,198 @@ +#ifndef _ASM_X86_LOCAL_H +#define _ASM_X86_LOCAL_H + +#include <linux/percpu.h> + +#include <asm/system.h> +#include <linux/atomic.h> +#include <asm/asm.h> + +typedef struct { + atomic_long_t a; +} local_t; + +#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } + +#define local_read(l) atomic_long_read(&(l)->a) +#define local_set(l, i) atomic_long_set(&(l)->a, (i)) + +static inline void local_inc(local_t *l) +{ + asm volatile(_ASM_INC "%0" + : "+m" (l->a.counter)); +} + +static inline void local_dec(local_t *l) +{ + asm volatile(_ASM_DEC "%0" + : "+m" (l->a.counter)); +} + +static inline void local_add(long i, local_t *l) +{ + asm volatile(_ASM_ADD "%1,%0" + : "+m" (l->a.counter) + : "ir" (i)); +} + +static inline void local_sub(long i, local_t *l) +{ + asm volatile(_ASM_SUB "%1,%0" + : "+m" (l->a.counter) + : "ir" (i)); +} + +/** + * local_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @l: pointer to type local_t + * + * Atomically subtracts @i from @l and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline int local_sub_and_test(long i, local_t *l) +{ + unsigned char c; + + asm volatile(_ASM_SUB "%2,%0; sete %1" + : "+m" (l->a.counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +} + +/** + * local_dec_and_test - decrement and test + * @l: pointer to type local_t + * + * Atomically decrements @l by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int local_dec_and_test(local_t *l) +{ + unsigned char c; + + asm volatile(_ASM_DEC "%0; sete %1" + : "+m" (l->a.counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * local_inc_and_test - increment and test + * @l: pointer to type local_t + * + * Atomically increments @l by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int local_inc_and_test(local_t *l) +{ + unsigned char c; + + asm volatile(_ASM_INC "%0; sete %1" + : "+m" (l->a.counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * local_add_negative - add and test if negative + * @i: integer value to add + * @l: pointer to type local_t + * + * Atomically adds @i to @l and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline int local_add_negative(long i, local_t *l) +{ + unsigned char c; + + asm volatile(_ASM_ADD "%2,%0; sets %1" + : "+m" (l->a.counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +} + +/** + * local_add_return - add and return + * @i: integer value to add + * @l: pointer to type local_t + * + * Atomically adds @i to @l and returns @i + @l + */ +static inline long local_add_return(long i, local_t *l) +{ + long __i; +#ifdef CONFIG_M386 + unsigned long flags; + if (unlikely(boot_cpu_data.x86 <= 3)) + goto no_xadd; +#endif + /* Modern 486+ processor */ + __i = i; + asm volatile(_ASM_XADD "%0, %1;" + : "+r" (i), "+m" (l->a.counter) + : : "memory"); + return i + __i; + +#ifdef CONFIG_M386 +no_xadd: /* Legacy 386 processor */ + local_irq_save(flags); + __i = local_read(l); + local_set(l, i + __i); + local_irq_restore(flags); + return i + __i; +#endif +} + +static inline long local_sub_return(long i, local_t *l) +{ + return local_add_return(-i, l); +} + +#define local_inc_return(l) (local_add_return(1, l)) +#define local_dec_return(l) (local_sub_return(1, l)) + +#define local_cmpxchg(l, o, n) \ + (cmpxchg_local(&((l)->a.counter), (o), (n))) +/* Always has a lock prefix */ +#define local_xchg(l, n) (xchg(&((l)->a.counter), (n))) + +/** + * local_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to l... + * @u: ...unless l is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +#define local_add_unless(l, a, u) \ +({ \ + long c, old; \ + c = local_read((l)); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = local_cmpxchg((l), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + +/* On x86_32, these are no better than the atomic variants. + * On x86-64 these are better than the atomic variants on SMP kernels + * because they dont use a lock prefix. + */ +#define __local_inc(l) local_inc(l) +#define __local_dec(l) local_dec(l) +#define __local_add(i, l) local_add((i), (l)) +#define __local_sub(i, l) local_sub((i), (l)) + +#endif /* _ASM_X86_LOCAL_H */ diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h new file mode 100644 index 00000000000..36c93b5cc23 --- /dev/null +++ b/arch/x86/include/asm/local64.h @@ -0,0 +1 @@ +#include <asm-generic/local64.h> diff --git a/arch/x86/include/asm/mach_timer.h b/arch/x86/include/asm/mach_timer.h new file mode 100644 index 00000000000..88d0c3c74c1 --- /dev/null +++ b/arch/x86/include/asm/mach_timer.h @@ -0,0 +1,48 @@ +/* + * Machine specific calibrate_tsc() for generic. + * Split out from timer_tsc.c by Osamu Tomita <tomita@cinet.co.jp> + */ +/* ------ Calibrate the TSC ------- + * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). + * Too much 64-bit arithmetic here to do this cleanly in C, and for + * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2) + * output busy loop as low as possible. We avoid reading the CTC registers + * directly because of the awkward 8-bit access mechanism of the 82C54 + * device. + */ +#ifndef _ASM_X86_MACH_DEFAULT_MACH_TIMER_H +#define _ASM_X86_MACH_DEFAULT_MACH_TIMER_H + +#define CALIBRATE_TIME_MSEC 30 /* 30 msecs */ +#define CALIBRATE_LATCH \ + ((PIT_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000) + +static inline void mach_prepare_counter(void) +{ + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Now let's take care of CTC channel 2 + * + * Set the Gate high, program CTC channel 2 for mode 0, + * (interrupt on terminal count mode), binary count, + * load 5 * LATCH count, (LSB and MSB) to begin countdown. + * + * Some devices need a delay here. + */ + outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */ + outb_p(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */ + outb_p(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */ +} + +static inline void mach_countup(unsigned long *count_p) +{ + unsigned long count = 0; + do { + count++; + } while ((inb_p(0x61) & 0x20) == 0); + *count_p = count; +} + +#endif /* _ASM_X86_MACH_DEFAULT_MACH_TIMER_H */ diff --git a/arch/x86/include/asm/mach_traps.h b/arch/x86/include/asm/mach_traps.h new file mode 100644 index 00000000000..a01e7ec7d23 --- /dev/null +++ b/arch/x86/include/asm/mach_traps.h @@ -0,0 +1,43 @@ +/* + * Machine specific NMI handling for generic. + * Split out from traps.c by Osamu Tomita <tomita@cinet.co.jp> + */ +#ifndef _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H +#define _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H + +#include <asm/mc146818rtc.h> + +#define NMI_REASON_PORT 0x61 + +#define NMI_REASON_SERR 0x80 +#define NMI_REASON_IOCHK 0x40 +#define NMI_REASON_MASK (NMI_REASON_SERR | NMI_REASON_IOCHK) + +#define NMI_REASON_CLEAR_SERR 0x04 +#define NMI_REASON_CLEAR_IOCHK 0x08 +#define NMI_REASON_CLEAR_MASK 0x0f + +static inline unsigned char default_get_nmi_reason(void) +{ + return inb(NMI_REASON_PORT); +} + +static inline void reassert_nmi(void) +{ + int old_reg = -1; + + if (do_i_have_lock_cmos()) + old_reg = current_lock_cmos_reg(); + else + lock_cmos(0); /* register doesn't matter here */ + outb(0x8f, 0x70); + inb(0x71); /* dummy */ + outb(0x0f, 0x70); + inb(0x71); /* dummy */ + if (old_reg >= 0) + outb(old_reg, 0x70); + else + unlock_cmos(); +} + +#endif /* _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H */ diff --git a/arch/x86/include/asm/math_emu.h b/arch/x86/include/asm/math_emu.h new file mode 100644 index 00000000000..031f6266f42 --- /dev/null +++ b/arch/x86/include/asm/math_emu.h @@ -0,0 +1,18 @@ +#ifndef _ASM_X86_MATH_EMU_H +#define _ASM_X86_MATH_EMU_H + +#include <asm/ptrace.h> +#include <asm/vm86.h> + +/* This structure matches the layout of the data saved to the stack + following a device-not-present interrupt, part of it saved + automatically by the 80386/80486. + */ +struct math_emu_info { + long ___orig_eip; + union { + struct pt_regs *regs; + struct kernel_vm86_regs *vm86; + }; +}; +#endif /* _ASM_X86_MATH_EMU_H */ diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h new file mode 100644 index 00000000000..0e8e85bb7c5 --- /dev/null +++ b/arch/x86/include/asm/mc146818rtc.h @@ -0,0 +1,104 @@ +/* + * Machine dependent access functions for RTC registers. + */ +#ifndef _ASM_X86_MC146818RTC_H +#define _ASM_X86_MC146818RTC_H + +#include <asm/io.h> +#include <asm/system.h> +#include <asm/processor.h> +#include <linux/mc146818rtc.h> + +#ifndef RTC_PORT +#define RTC_PORT(x) (0x70 + (x)) +#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ +#endif + +#if defined(CONFIG_X86_32) && defined(__HAVE_ARCH_CMPXCHG) +/* + * This lock provides nmi access to the CMOS/RTC registers. It has some + * special properties. It is owned by a CPU and stores the index register + * currently being accessed (if owned). The idea here is that it works + * like a normal lock (normally). However, in an NMI, the NMI code will + * first check to see if its CPU owns the lock, meaning that the NMI + * interrupted during the read/write of the device. If it does, it goes ahead + * and performs the access and then restores the index register. If it does + * not, it locks normally. + * + * Note that since we are working with NMIs, we need this lock even in + * a non-SMP machine just to mark that the lock is owned. + * + * This only works with compare-and-swap. There is no other way to + * atomically claim the lock and set the owner. + */ +#include <linux/smp.h> +extern volatile unsigned long cmos_lock; + +/* + * All of these below must be called with interrupts off, preempt + * disabled, etc. + */ + +static inline void lock_cmos(unsigned char reg) +{ + unsigned long new; + new = ((smp_processor_id() + 1) << 8) | reg; + for (;;) { + if (cmos_lock) { + cpu_relax(); + continue; + } + if (__cmpxchg(&cmos_lock, 0, new, sizeof(cmos_lock)) == 0) + return; + } +} + +static inline void unlock_cmos(void) +{ + cmos_lock = 0; +} + +static inline int do_i_have_lock_cmos(void) +{ + return (cmos_lock >> 8) == (smp_processor_id() + 1); +} + +static inline unsigned char current_lock_cmos_reg(void) +{ + return cmos_lock & 0xff; +} + +#define lock_cmos_prefix(reg) \ + do { \ + unsigned long cmos_flags; \ + local_irq_save(cmos_flags); \ + lock_cmos(reg) + +#define lock_cmos_suffix(reg) \ + unlock_cmos(); \ + local_irq_restore(cmos_flags); \ + } while (0) +#else +#define lock_cmos_prefix(reg) do {} while (0) +#define lock_cmos_suffix(reg) do {} while (0) +#define lock_cmos(reg) do { } while (0) +#define unlock_cmos() do { } while (0) +#define do_i_have_lock_cmos() 0 +#define current_lock_cmos_reg() 0 +#endif + +/* + * The yet supported machines all access the RTC index register via + * an ISA port access but the way to access the date register differs ... + */ +#define CMOS_READ(addr) rtc_cmos_read(addr) +#define CMOS_WRITE(val, addr) rtc_cmos_write(val, addr) +unsigned char rtc_cmos_read(unsigned char addr); +void rtc_cmos_write(unsigned char val, unsigned char addr); + +extern int mach_set_rtc_mmss(unsigned long nowtime); +extern unsigned long mach_get_cmos_time(void); + +#define RTC_IRQ 8 + +#endif /* _ASM_X86_MC146818RTC_H */ diff --git a/arch/x86/include/asm/mca.h b/arch/x86/include/asm/mca.h new file mode 100644 index 00000000000..eedbb6cc1ef --- /dev/null +++ b/arch/x86/include/asm/mca.h @@ -0,0 +1,43 @@ +/* -*- mode: c; c-basic-offset: 8 -*- */ + +/* Platform specific MCA defines */ +#ifndef _ASM_X86_MCA_H +#define _ASM_X86_MCA_H + +/* Maximal number of MCA slots - actually, some machines have less, but + * they all have sufficient number of POS registers to cover 8. + */ +#define MCA_MAX_SLOT_NR 8 + +/* Most machines have only one MCA bus. The only multiple bus machines + * I know have at most two */ +#define MAX_MCA_BUSSES 2 + +#define MCA_PRIMARY_BUS 0 +#define MCA_SECONDARY_BUS 1 + +/* Dummy slot numbers on primary MCA for integrated functions */ +#define MCA_INTEGSCSI (MCA_MAX_SLOT_NR) +#define MCA_INTEGVIDEO (MCA_MAX_SLOT_NR+1) +#define MCA_MOTHERBOARD (MCA_MAX_SLOT_NR+2) + +/* Dummy POS values for integrated functions */ +#define MCA_DUMMY_POS_START 0x10000 +#define MCA_INTEGSCSI_POS (MCA_DUMMY_POS_START+1) +#define MCA_INTEGVIDEO_POS (MCA_DUMMY_POS_START+2) +#define MCA_MOTHERBOARD_POS (MCA_DUMMY_POS_START+3) + +/* MCA registers */ + +#define MCA_MOTHERBOARD_SETUP_REG 0x94 +#define MCA_ADAPTER_SETUP_REG 0x96 +#define MCA_POS_REG(n) (0x100+(n)) + +#define MCA_ENABLED 0x01 /* POS 2, set if adapter enabled */ + +/* Max number of adapters, including both slots and various integrated + * things. + */ +#define MCA_NUMADAPTERS (MCA_MAX_SLOT_NR+3) + +#endif /* _ASM_X86_MCA_H */ diff --git a/arch/x86/include/asm/mca_dma.h b/arch/x86/include/asm/mca_dma.h new file mode 100644 index 00000000000..45271aef82d --- /dev/null +++ b/arch/x86/include/asm/mca_dma.h @@ -0,0 +1,201 @@ +#ifndef _ASM_X86_MCA_DMA_H +#define _ASM_X86_MCA_DMA_H + +#include <asm/io.h> +#include <linux/ioport.h> + +/* + * Microchannel specific DMA stuff. DMA on an MCA machine is fairly similar to + * standard PC dma, but it certainly has its quirks. DMA register addresses + * are in a different place and there are some added functions. Most of this + * should be pretty obvious on inspection. Note that the user must divide + * count by 2 when using 16-bit dma; that is not handled by these functions. + * + * Ramen Noodles are yummy. + * + * 1998 Tymm Twillman <tymm@computer.org> + */ + +/* + * Registers that are used by the DMA controller; FN is the function register + * (tell the controller what to do) and EXE is the execution register (how + * to do it) + */ + +#define MCA_DMA_REG_FN 0x18 +#define MCA_DMA_REG_EXE 0x1A + +/* + * Functions that the DMA controller can do + */ + +#define MCA_DMA_FN_SET_IO 0x00 +#define MCA_DMA_FN_SET_ADDR 0x20 +#define MCA_DMA_FN_GET_ADDR 0x30 +#define MCA_DMA_FN_SET_COUNT 0x40 +#define MCA_DMA_FN_GET_COUNT 0x50 +#define MCA_DMA_FN_GET_STATUS 0x60 +#define MCA_DMA_FN_SET_MODE 0x70 +#define MCA_DMA_FN_SET_ARBUS 0x80 +#define MCA_DMA_FN_MASK 0x90 +#define MCA_DMA_FN_RESET_MASK 0xA0 +#define MCA_DMA_FN_MASTER_CLEAR 0xD0 + +/* + * Modes (used by setting MCA_DMA_FN_MODE in the function register) + * + * Note that the MODE_READ is read from memory (write to device), and + * MODE_WRITE is vice-versa. + */ + +#define MCA_DMA_MODE_XFER 0x04 /* read by default */ +#define MCA_DMA_MODE_READ 0x04 /* same as XFER */ +#define MCA_DMA_MODE_WRITE 0x08 /* OR with MODE_XFER to use */ +#define MCA_DMA_MODE_IO 0x01 /* DMA from IO register */ +#define MCA_DMA_MODE_16 0x40 /* 16 bit xfers */ + + +/** + * mca_enable_dma - channel to enable DMA on + * @dmanr: DMA channel + * + * Enable the MCA bus DMA on a channel. This can be called from + * IRQ context. + */ + +static inline void mca_enable_dma(unsigned int dmanr) +{ + outb(MCA_DMA_FN_RESET_MASK | dmanr, MCA_DMA_REG_FN); +} + +/** + * mca_disble_dma - channel to disable DMA on + * @dmanr: DMA channel + * + * Enable the MCA bus DMA on a channel. This can be called from + * IRQ context. + */ + +static inline void mca_disable_dma(unsigned int dmanr) +{ + outb(MCA_DMA_FN_MASK | dmanr, MCA_DMA_REG_FN); +} + +/** + * mca_set_dma_addr - load a 24bit DMA address + * @dmanr: DMA channel + * @a: 24bit bus address + * + * Load the address register in the DMA controller. This has a 24bit + * limitation (16Mb). + */ + +static inline void mca_set_dma_addr(unsigned int dmanr, unsigned int a) +{ + outb(MCA_DMA_FN_SET_ADDR | dmanr, MCA_DMA_REG_FN); + outb(a & 0xff, MCA_DMA_REG_EXE); + outb((a >> 8) & 0xff, MCA_DMA_REG_EXE); + outb((a >> 16) & 0xff, MCA_DMA_REG_EXE); +} + +/** + * mca_get_dma_addr - load a 24bit DMA address + * @dmanr: DMA channel + * + * Read the address register in the DMA controller. This has a 24bit + * limitation (16Mb). The return is a bus address. + */ + +static inline unsigned int mca_get_dma_addr(unsigned int dmanr) +{ + unsigned int addr; + + outb(MCA_DMA_FN_GET_ADDR | dmanr, MCA_DMA_REG_FN); + addr = inb(MCA_DMA_REG_EXE); + addr |= inb(MCA_DMA_REG_EXE) << 8; + addr |= inb(MCA_DMA_REG_EXE) << 16; + + return addr; +} + +/** + * mca_set_dma_count - load a 16bit transfer count + * @dmanr: DMA channel + * @count: count + * + * Set the DMA count for this channel. This can be up to 64Kbytes. + * Setting a count of zero will not do what you expect. + */ + +static inline void mca_set_dma_count(unsigned int dmanr, unsigned int count) +{ + count--; /* transfers one more than count -- correct for this */ + + outb(MCA_DMA_FN_SET_COUNT | dmanr, MCA_DMA_REG_FN); + outb(count & 0xff, MCA_DMA_REG_EXE); + outb((count >> 8) & 0xff, MCA_DMA_REG_EXE); +} + +/** + * mca_get_dma_residue - get the remaining bytes to transfer + * @dmanr: DMA channel + * + * This function returns the number of bytes left to transfer + * on this DMA channel. + */ + +static inline unsigned int mca_get_dma_residue(unsigned int dmanr) +{ + unsigned short count; + + outb(MCA_DMA_FN_GET_COUNT | dmanr, MCA_DMA_REG_FN); + count = 1 + inb(MCA_DMA_REG_EXE); + count += inb(MCA_DMA_REG_EXE) << 8; + + return count; +} + +/** + * mca_set_dma_io - set the port for an I/O transfer + * @dmanr: DMA channel + * @io_addr: an I/O port number + * + * Unlike the ISA bus DMA controllers the DMA on MCA bus can transfer + * with an I/O port target. + */ + +static inline void mca_set_dma_io(unsigned int dmanr, unsigned int io_addr) +{ + /* + * DMA from a port address -- set the io address + */ + + outb(MCA_DMA_FN_SET_IO | dmanr, MCA_DMA_REG_FN); + outb(io_addr & 0xff, MCA_DMA_REG_EXE); + outb((io_addr >> 8) & 0xff, MCA_DMA_REG_EXE); +} + +/** + * mca_set_dma_mode - set the DMA mode + * @dmanr: DMA channel + * @mode: mode to set + * + * The DMA controller supports several modes. The mode values you can + * set are- + * + * %MCA_DMA_MODE_READ when reading from the DMA device. + * + * %MCA_DMA_MODE_WRITE to writing to the DMA device. + * + * %MCA_DMA_MODE_IO to do DMA to or from an I/O port. + * + * %MCA_DMA_MODE_16 to do 16bit transfers. + */ + +static inline void mca_set_dma_mode(unsigned int dmanr, unsigned int mode) +{ + outb(MCA_DMA_FN_SET_MODE | dmanr, MCA_DMA_REG_FN); + outb(mode, MCA_DMA_REG_EXE); +} + +#endif /* _ASM_X86_MCA_DMA_H */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h new file mode 100644 index 00000000000..6aefb14cbbc --- /dev/null +++ b/arch/x86/include/asm/mce.h @@ -0,0 +1,252 @@ +#ifndef _ASM_X86_MCE_H +#define _ASM_X86_MCE_H + +#include <linux/types.h> +#include <asm/ioctls.h> + +/* + * Machine Check support for x86 + */ + +/* MCG_CAP register defines */ +#define MCG_BANKCNT_MASK 0xff /* Number of Banks */ +#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */ +#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ +#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ +#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ +#define MCG_EXT_CNT_SHIFT 16 +#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) +#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ + +/* MCG_STATUS register defines */ +#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ +#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ +#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ + +/* MCi_STATUS register defines */ +#define MCI_STATUS_VAL (1ULL<<63) /* valid error */ +#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ +#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ +#define MCI_STATUS_EN (1ULL<<60) /* error enabled */ +#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ +#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ +#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ +#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ +#define MCI_STATUS_AR (1ULL<<55) /* Action required */ + +/* MCi_MISC register defines */ +#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f) +#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7) +#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */ +#define MCI_MISC_ADDR_LINEAR 1 /* linear address */ +#define MCI_MISC_ADDR_PHYS 2 /* physical address */ +#define MCI_MISC_ADDR_MEM 3 /* memory address */ +#define MCI_MISC_ADDR_GENERIC 7 /* generic */ + +/* CTL2 register defines */ +#define MCI_CTL2_CMCI_EN (1ULL << 30) +#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL + +#define MCJ_CTX_MASK 3 +#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) +#define MCJ_CTX_RANDOM 0 /* inject context: random */ +#define MCJ_CTX_PROCESS 0x1 /* inject context: process */ +#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ +#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ +#define MCJ_EXCEPTION 0x8 /* raise as exception */ +#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */ + +/* Fields are zero when not available */ +struct mce { + __u64 status; + __u64 misc; + __u64 addr; + __u64 mcgstatus; + __u64 ip; + __u64 tsc; /* cpu time stamp counter */ + __u64 time; /* wall time_t when error was detected */ + __u8 cpuvendor; /* cpu vendor as encoded in system.h */ + __u8 inject_flags; /* software inject flags */ + __u16 pad; + __u32 cpuid; /* CPUID 1 EAX */ + __u8 cs; /* code segment */ + __u8 bank; /* machine check bank */ + __u8 cpu; /* cpu number; obsolete; use extcpu now */ + __u8 finished; /* entry is valid */ + __u32 extcpu; /* linux cpu number that detected the error */ + __u32 socketid; /* CPU socket ID */ + __u32 apicid; /* CPU initial apic ID */ + __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ +}; + +/* + * This structure contains all data related to the MCE log. Also + * carries a signature to make it easier to find from external + * debugging tools. Each entry is only valid when its finished flag + * is set. + */ + +#define MCE_LOG_LEN 32 + +struct mce_log { + char signature[12]; /* "MACHINECHECK" */ + unsigned len; /* = MCE_LOG_LEN */ + unsigned next; + unsigned flags; + unsigned recordlen; /* length of struct mce */ + struct mce entry[MCE_LOG_LEN]; +}; + +#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ + +#define MCE_LOG_SIGNATURE "MACHINECHECK" + +#define MCE_GET_RECORD_LEN _IOR('M', 1, int) +#define MCE_GET_LOG_LEN _IOR('M', 2, int) +#define MCE_GETCLEAR_FLAGS _IOR('M', 3, int) + +/* Software defined banks */ +#define MCE_EXTENDED_BANK 128 +#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 + +#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) /* MCE_AMD */ +#define K8_MCE_THRESHOLD_BANK_0 (MCE_THRESHOLD_BASE + 0 * 9) +#define K8_MCE_THRESHOLD_BANK_1 (MCE_THRESHOLD_BASE + 1 * 9) +#define K8_MCE_THRESHOLD_BANK_2 (MCE_THRESHOLD_BASE + 2 * 9) +#define K8_MCE_THRESHOLD_BANK_3 (MCE_THRESHOLD_BASE + 3 * 9) +#define K8_MCE_THRESHOLD_BANK_4 (MCE_THRESHOLD_BASE + 4 * 9) +#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) +#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) + + +#ifdef __KERNEL__ + +extern void mce_register_decode_chain(struct notifier_block *nb); +extern void mce_unregister_decode_chain(struct notifier_block *nb); + +#include <linux/percpu.h> +#include <linux/init.h> +#include <linux/atomic.h> + +extern int mce_disabled; +extern int mce_p5_enabled; + +#ifdef CONFIG_X86_MCE +int mcheck_init(void); +void mcheck_cpu_init(struct cpuinfo_x86 *c); +#else +static inline int mcheck_init(void) { return 0; } +static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} +#endif + +#ifdef CONFIG_X86_ANCIENT_MCE +void intel_p5_mcheck_init(struct cpuinfo_x86 *c); +void winchip_mcheck_init(struct cpuinfo_x86 *c); +static inline void enable_p5_mce(void) { mce_p5_enabled = 1; } +#else +static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} +static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} +static inline void enable_p5_mce(void) {} +#endif + +void mce_setup(struct mce *m); +void mce_log(struct mce *m); +extern struct device *mce_device[CONFIG_NR_CPUS]; + +/* + * Maximum banks number. + * This is the limit of the current register layout on + * Intel CPUs. + */ +#define MAX_NR_BANKS 32 + +#ifdef CONFIG_X86_MCE_INTEL +extern int mce_cmci_disabled; +extern int mce_ignore_ce; +void mce_intel_feature_init(struct cpuinfo_x86 *c); +void cmci_clear(void); +void cmci_reenable(void); +void cmci_rediscover(int dying); +void cmci_recheck(void); +#else +static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } +static inline void cmci_clear(void) {} +static inline void cmci_reenable(void) {} +static inline void cmci_rediscover(int dying) {} +static inline void cmci_recheck(void) {} +#endif + +#ifdef CONFIG_X86_MCE_AMD +void mce_amd_feature_init(struct cpuinfo_x86 *c); +#else +static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } +#endif + +int mce_available(struct cpuinfo_x86 *c); + +DECLARE_PER_CPU(unsigned, mce_exception_count); +DECLARE_PER_CPU(unsigned, mce_poll_count); + +extern atomic_t mce_entry; + +typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); +DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); + +enum mcp_flags { + MCP_TIMESTAMP = (1 << 0), /* log time stamp */ + MCP_UC = (1 << 1), /* log uncorrected errors */ + MCP_DONTLOG = (1 << 2), /* only clear, don't log */ +}; +void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); + +int mce_notify_irq(void); +void mce_notify_process(void); + +DECLARE_PER_CPU(struct mce, injectm); + +extern void register_mce_write_callback(ssize_t (*)(struct file *filp, + const char __user *ubuf, + size_t usize, loff_t *off)); + +/* + * Exception handler + */ + +/* Call the installed machine check handler for this CPU setup. */ +extern void (*machine_check_vector)(struct pt_regs *, long error_code); +void do_machine_check(struct pt_regs *, long); + +/* + * Threshold handler + */ + +extern void (*mce_threshold_vector)(void); +extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); + +/* + * Thermal handler + */ + +void intel_init_thermal(struct cpuinfo_x86 *c); + +void mce_log_therm_throt_event(__u64 status); + +/* Interrupt Handler for core thermal thresholds */ +extern int (*platform_thermal_notify)(__u64 msr_val); + +#ifdef CONFIG_X86_THERMAL_VECTOR +extern void mcheck_intel_therm_init(void); +#else |