summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Makefile1
-rw-r--r--arch/x86/include/asm/dma-mapping.h26
-rw-r--r--arch/x86/include/asm/processor.h10
-rw-r--r--arch/x86/include/asm/word-at-a-time.h46
-rw-r--r--arch/x86/kernel/amd_gart_64.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c13
-rw-r--r--arch/x86/kernel/irq.c7
-rw-r--r--arch/x86/kernel/kdebugfs.c9
-rw-r--r--arch/x86/kernel/kgdb.c60
-rw-r--r--arch/x86/kernel/pci-calgary_64.c9
-rw-r--r--arch/x86/kernel/pci-dma.c3
-rw-r--r--arch/x86/kernel/pci-nommu.c6
-rw-r--r--arch/x86/kernel/pci-swiotlb.c17
-rw-r--r--arch/x86/kernel/process.c24
-rw-r--r--arch/x86/net/bpf_jit.S122
-rw-r--r--arch/x86/net/bpf_jit_comp.c41
-rw-r--r--arch/x86/power/cpu.c2
-rw-r--r--arch/x86/xen/mmu.c4
-rw-r--r--arch/x86/xen/pci-swiotlb-xen.c4
-rw-r--r--arch/x86/xen/smp.c2
20 files changed, 294 insertions, 123 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 968dbe24a255..41a7237606a3 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -129,6 +129,7 @@ KBUILD_CFLAGS += -Wno-sign-compare
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# prevent gcc from generating any FP code by mistake
KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
+KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
KBUILD_CFLAGS += $(mflags-y)
KBUILD_AFLAGS += $(mflags-y)
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index ed3065fd6314..4b4331d71935 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -59,7 +59,8 @@ extern int dma_supported(struct device *hwdev, u64 mask);
extern int dma_set_mask(struct device *dev, u64 mask);
extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addr, gfp_t flag);
+ dma_addr_t *dma_addr, gfp_t flag,
+ struct dma_attrs *attrs);
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
@@ -111,9 +112,11 @@ static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
return gfp;
}
+#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
+
static inline void *
-dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp)
+dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ gfp_t gfp, struct dma_attrs *attrs)
{
struct dma_map_ops *ops = get_dma_ops(dev);
void *memory;
@@ -129,18 +132,21 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
if (!is_device_dma_capable(dev))
return NULL;
- if (!ops->alloc_coherent)
+ if (!ops->alloc)
return NULL;
- memory = ops->alloc_coherent(dev, size, dma_handle,
- dma_alloc_coherent_gfp_flags(dev, gfp));
+ memory = ops->alloc(dev, size, dma_handle,
+ dma_alloc_coherent_gfp_flags(dev, gfp), attrs);
debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
return memory;
}
-static inline void dma_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t bus)
+#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t bus,
+ struct dma_attrs *attrs)
{
struct dma_map_ops *ops = get_dma_ops(dev);
@@ -150,8 +156,8 @@ static inline void dma_free_coherent(struct device *dev, size_t size,
return;
debug_dma_free_coherent(dev, size, vaddr, bus);
- if (ops->free_coherent)
- ops->free_coherent(dev, size, vaddr, bus);
+ if (ops->free)
+ ops->free(dev, size, vaddr, bus, attrs);
}
#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7284c9a6a0b5..4fa7dcceb6c0 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -974,16 +974,6 @@ extern bool cpu_has_amd_erratum(const int *);
#define cpu_has_amd_erratum(x) (false)
#endif /* CONFIG_CPU_SUP_AMD */
-#ifdef CONFIG_X86_32
-/*
- * disable hlt during certain critical i/o operations
- */
-#define HAVE_DISABLE_HLT
-#endif
-
-void disable_hlt(void);
-void enable_hlt(void);
-
void cpu_idle_wait(void);
extern unsigned long arch_align_stack(unsigned long sp);
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..6fe6767b7124
--- /dev/null
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -0,0 +1,46 @@
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+/*
+ * This is largely generic for little-endian machines, but the
+ * optimal byte mask counting is probably going to be something
+ * that is architecture-specific. If you have a reliably fast
+ * bit count instruction, that might be better than the multiply
+ * and shift, for example.
+ */
+
+#ifdef CONFIG_64BIT
+
+/*
+ * Jan Achrenius on G+: microoptimized version of
+ * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
+ * that works for the bytemasks without having to
+ * mask them first.
+ */
+static inline long count_masked_bytes(unsigned long mask)
+{
+ return mask*0x0001020304050608ul >> 56;
+}
+
+#else /* 32-bit case */
+
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
+static inline long count_masked_bytes(long mask)
+{
+ /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+ long a = (0x0ff0001+mask) >> 23;
+ /* Fix the 1 for 00 case */
+ return a & mask;
+}
+
+#endif
+
+#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
+
+/* Return the high bit set in the first byte that is a zero */
+static inline unsigned long has_zero(unsigned long a)
+{
+ return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80);
+}
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index b1e7c7f7a0af..e66311200cbd 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -477,7 +477,7 @@ error:
/* allocate and map a coherent mapping */
static void *
gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
- gfp_t flag)
+ gfp_t flag, struct dma_attrs *attrs)
{
dma_addr_t paddr;
unsigned long align_mask;
@@ -500,7 +500,8 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
}
__free_pages(page, get_order(size));
} else
- return dma_generic_alloc_coherent(dev, size, dma_addr, flag);
+ return dma_generic_alloc_coherent(dev, size, dma_addr, flag,
+ attrs);
return NULL;
}
@@ -508,7 +509,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
/* free a coherent mapping */
static void
gart_free_coherent(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_addr)
+ dma_addr_t dma_addr, struct dma_attrs *attrs)
{
gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
free_pages((unsigned long)vaddr, get_order(size));
@@ -700,8 +701,8 @@ static struct dma_map_ops gart_dma_ops = {
.unmap_sg = gart_unmap_sg,
.map_page = gart_map_page,
.unmap_page = gart_unmap_page,
- .alloc_coherent = gart_alloc_coherent,
- .free_coherent = gart_free_coherent,
+ .alloc = gart_alloc_coherent,
+ .free = gart_free_coherent,
.mapping_error = gart_mapping_error,
};
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ef484d9d0a25..a2dfacfd7103 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1271,6 +1271,17 @@ done:
return num ? -EINVAL : 0;
}
+PMU_FORMAT_ATTR(cccr, "config:0-31" );
+PMU_FORMAT_ATTR(escr, "config:32-62");
+PMU_FORMAT_ATTR(ht, "config:63" );
+
+static struct attribute *intel_p4_formats_attr[] = {
+ &format_attr_cccr.attr,
+ &format_attr_escr.attr,
+ &format_attr_ht.attr,
+ NULL,
+};
+
static __initconst const struct x86_pmu p4_pmu = {
.name = "Netburst P4/Xeon",
.handle_irq = p4_pmu_handle_irq,
@@ -1305,6 +1316,8 @@ static __initconst const struct x86_pmu p4_pmu = {
* the former idea is taken from OProfile code
*/
.perfctr_second_write = 1,
+
+ .format_attrs = intel_p4_formats_attr,
};
__init int p4_pmu_init(void)
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 7943e0c21bde..3dafc6003b7c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -282,8 +282,13 @@ void fixup_irqs(void)
else if (!(warned++))
set_affinity = 0;
+ /*
+ * We unmask if the irq was not marked masked by the
+ * core code. That respects the lazy irq disable
+ * behaviour.
+ */
if (!irqd_can_move_in_process_context(data) &&
- !irqd_irq_disabled(data) && chip->irq_unmask)
+ !irqd_irq_masked(data) && chip->irq_unmask)
chip->irq_unmask(data);
raw_spin_unlock(&desc->lock);
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 90fcf62854bb..1d5d31ea686b 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -68,16 +68,9 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf,
return count;
}
-static int setup_data_open(struct inode *inode, struct file *file)
-{
- file->private_data = inode->i_private;
-
- return 0;
-}
-
static const struct file_operations fops_setup_data = {
.read = setup_data_read,
- .open = setup_data_open,
+ .open = simple_open,
.llseek = default_llseek,
};
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index db6720edfdd0..8bfb6146f753 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -43,6 +43,8 @@
#include <linux/smp.h>
#include <linux/nmi.h>
#include <linux/hw_breakpoint.h>
+#include <linux/uaccess.h>
+#include <linux/memory.h>
#include <asm/debugreg.h>
#include <asm/apicdef.h>
@@ -741,6 +743,64 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
regs->ip = ip;
}
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int err;
+ char opc[BREAK_INSTR_SIZE];
+
+ bpt->type = BP_BREAKPOINT;
+ err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
+ BREAK_INSTR_SIZE);
+ if (err)
+ return err;
+ err = probe_kernel_write((char *)bpt->bpt_addr,
+ arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
+#ifdef CONFIG_DEBUG_RODATA
+ if (!err)
+ return err;
+ /*
+ * It is safe to call text_poke() because normal kernel execution
+ * is stopped on all cores, so long as the text_mutex is not locked.
+ */
+ if (mutex_is_locked(&text_mutex))
+ return -EBUSY;
+ text_poke((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
+ BREAK_INSTR_SIZE);
+ err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
+ if (err)
+ return err;
+ if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
+ return -EINVAL;
+ bpt->type = BP_POKE_BREAKPOINT;
+#endif /* CONFIG_DEBUG_RODATA */
+ return err;
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+#ifdef CONFIG_DEBUG_RODATA
+ int err;
+ char opc[BREAK_INSTR_SIZE];
+
+ if (bpt->type != BP_POKE_BREAKPOINT)
+ goto knl_write;
+ /*
+ * It is safe to call text_poke() because normal kernel execution
+ * is stopped on all cores, so long as the text_mutex is not locked.
+ */
+ if (mutex_is_locked(&text_mutex))
+ goto knl_write;
+ text_poke((void *)bpt->bpt_addr, bpt->saved_instr, BREAK_INSTR_SIZE);
+ err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
+ if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
+ goto knl_write;
+ return err;
+knl_write:
+#endif /* CONFIG_DEBUG_RODATA */
+ return probe_kernel_write((char *)bpt->bpt_addr,
+ (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
+}
+
struct kgdb_arch arch_kgdb_ops = {
/* Breakpoint instruction: */
.gdb_bpt_instr = { 0xcc },
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 6ac5782f4d6b..d0b2fb9ccbb1 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -430,7 +430,7 @@ static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr,
}
static void* calgary_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag)
+ dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs)
{
void *ret = NULL;
dma_addr_t mapping;
@@ -463,7 +463,8 @@ error:
}
static void calgary_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle)
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
{
unsigned int npages;
struct iommu_table *tbl = find_iommu_table(dev);
@@ -476,8 +477,8 @@ static void calgary_free_coherent(struct device *dev, size_t size,
}
static struct dma_map_ops calgary_dma_ops = {
- .alloc_coherent = calgary_alloc_coherent,
- .free_coherent = calgary_free_coherent,
+ .alloc = calgary_alloc_coherent,
+ .free = calgary_free_coherent,
.map_sg = calgary_map_sg,
.unmap_sg = calgary_unmap_sg,
.map_page = calgary_map_page,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 28e5e06fcba4..3003250ac51d 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -96,7 +96,8 @@ void __init pci_iommu_alloc(void)
}
}
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addr, gfp_t flag)
+ dma_addr_t *dma_addr, gfp_t flag,
+ struct dma_attrs *attrs)
{
unsigned long dma_mask;
struct page *page;
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 3af4af810c07..f96050685b46 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -75,7 +75,7 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
}
static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_addr)
+ dma_addr_t dma_addr, struct dma_attrs *attrs)
{
free_pages((unsigned long)vaddr, get_order(size));
}
@@ -96,8 +96,8 @@ static void nommu_sync_sg_for_device(struct device *dev,
}
struct dma_map_ops nommu_dma_ops = {
- .alloc_coherent = dma_generic_alloc_coherent,
- .free_coherent = nommu_free_coherent,
+ .alloc = dma_generic_alloc_coherent,
+ .free = nommu_free_coherent,
.map_sg = nommu_map_sg,
.map_page = nommu_map_page,
.sync_single_for_device = nommu_sync_single_for_device,
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 8f972cbddef0..6c483ba98b9c 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -15,21 +15,30 @@
int swiotlb __read_mostly;
static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags)
+ dma_addr_t *dma_handle, gfp_t flags,
+ struct dma_attrs *attrs)
{
void *vaddr;
- vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags);
+ vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags,
+ attrs);
if (vaddr)
return vaddr;
return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
}
+static void x86_swiotlb_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_addr,
+ struct dma_attrs *attrs)
+{
+ swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+}
+
static struct dma_map_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
- .alloc_coherent = x86_swiotlb_alloc_coherent,
- .free_coherent = swiotlb_free_coherent,
+ .alloc = x86_swiotlb_alloc_coherent,
+ .free = x86_swiotlb_free_coherent,
.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
.sync_single_for_device = swiotlb_sync_single_for_device,
.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index a33afaa5ddb7..1d92a5ab6e8b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -362,34 +362,10 @@ void (*pm_idle)(void);
EXPORT_SYMBOL(pm_idle);
#endif
-#ifdef CONFIG_X86_32
-/*
- * This halt magic was a workaround for ancient floppy DMA
- * wreckage. It should be safe to remove.
- */
-static int hlt_counter;
-void disable_hlt(void)
-{
- hlt_counter++;
-}
-EXPORT_SYMBOL(disable_hlt);
-
-void enable_hlt(void)
-{
- hlt_counter--;
-}
-EXPORT_SYMBOL(enable_hlt);
-
-static inline int hlt_use_halt(void)
-{
- return (!hlt_counter && boot_cpu_data.hlt_works_ok);
-}
-#else
static inline int hlt_use_halt(void)
{
return 1;
}
-#endif
#ifndef CONFIG_SMP
static inline void play_dead(void)
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 66870223f8c5..877b9a1b2152 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -18,17 +18,17 @@
* r9d : hlen = skb->len - skb->data_len
*/
#define SKBDATA %r8
-
-sk_load_word_ind:
- .globl sk_load_word_ind
-
- add %ebx,%esi /* offset += X */
-# test %esi,%esi /* if (offset < 0) goto bpf_error; */
- js bpf_error
+#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
sk_load_word:
.globl sk_load_word
+ test %esi,%esi
+ js bpf_slow_path_word_neg
+
+sk_load_word_positive_offset:
+ .globl sk_load_word_positive_offset
+
mov %r9d,%eax # hlen
sub %esi,%eax # hlen - offset
cmp $3,%eax
@@ -37,16 +37,15 @@ sk_load_word:
bswap %eax /* ntohl() */
ret
-
-sk_load_half_ind:
- .globl sk_load_half_ind
-
- add %ebx,%esi /* offset += X */
- js bpf_error
-
sk_load_half:
.globl sk_load_half
+ test %esi,%esi
+ js bpf_slow_path_half_neg
+
+sk_load_half_positive_offset:
+ .globl sk_load_half_positive_offset
+
mov %r9d,%eax
sub %esi,%eax # hlen - offset
cmp $1,%eax
@@ -55,14 +54,15 @@ sk_load_half:
rol $8,%ax # ntohs()
ret
-sk_load_byte_ind:
- .globl sk_load_byte_ind
- add %ebx,%esi /* offset += X */
- js bpf_error
-
sk_load_byte:
.globl sk_load_byte
+ test %esi,%esi
+ js bpf_slow_path_byte_neg
+
+sk_load_byte_positive_offset:
+ .globl sk_load_byte_positive_offset
+
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
jle bpf_slow_path_byte
movzbl (SKBDATA,%rsi),%eax
@@ -73,25 +73,21 @@ sk_load_byte:
*
* Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
* Must preserve A accumulator (%eax)
- * Inputs : %esi is the offset value, already known positive
+ * Inputs : %esi is the offset value
*/
-ENTRY(sk_load_byte_msh)
- CFI_STARTPROC
+sk_load_byte_msh:
+ .globl sk_load_byte_msh
+ test %esi,%esi
+ js bpf_slow_path_byte_msh_neg
+
+sk_load_byte_msh_positive_offset:
+ .globl sk_load_byte_msh_positive_offset
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
jle bpf_slow_path_byte_msh
movzbl (SKBDATA,%rsi),%ebx
and $15,%bl
shl $2,%bl
ret
- CFI_ENDPROC
-ENDPROC(sk_load_byte_msh)
-
-bpf_error:
-# force a return 0 from jit handler
- xor %eax,%eax
- mov -8(%rbp),%rbx
- leaveq
- ret
/* rsi contains offset and can be scratched */
#define bpf_slow_path_common(LEN) \
@@ -138,3 +134,67 @@ bpf_slow_path_byte_msh:
shl $2,%al
xchg %eax,%ebx
ret
+
+#define sk_negative_common(SIZE) \
+ push %rdi; /* save skb */ \
+ push %r9; \
+ push SKBDATA; \
+/* rsi already has offset */ \
+ mov $SIZE,%ecx; /* size */ \
+ call bpf_internal_load_pointer_neg_helper; \
+ test %rax,%rax; \
+ pop SKBDATA; \
+ pop %r9; \
+ pop %rdi; \
+ jz bpf_error
+
+
+bpf_slow_path_word_neg:
+ cmp SKF_MAX_NEG_OFF, %esi /* test range */
+ jl bpf_error /* offset lower -> error */
+sk_load_word_negative_offset:
+ .globl sk_load_word_negative_offset
+ sk_negative_common(4)
+ mov (%rax), %eax
+ bswap %eax
+ ret
+
+bpf_slow_path_half_neg:
+ cmp SKF_MAX_NEG_OFF, %esi
+ jl bpf_error
+sk_load_half_negative_offset:
+ .globl sk_load_half_negative_offset
+ sk_negative_common(2)
+ mov (%rax),%ax
+ rol $8,%ax
+ movzwl %ax,%eax
+ ret
+
+bpf_slow_path_byte_neg:
+ cmp SKF_MAX_NEG_OFF, %esi
+ jl bpf_error
+sk_load_byte_negative_offset:
+ .globl sk_load_byte_negative_offset
+ sk_negative_common(1)
+ movzbl (%rax), %eax
+ ret
+
+bpf_slow_path_byte_msh_neg:
+ cmp SKF_MAX_NEG_OFF, %esi
+ jl bpf_error
+sk_load_byte_msh_negative_offset:
+ .globl sk_load_byte_msh_negative_offset
+ xchg %eax,%ebx /* dont lose A , X is about to be scratched */
+ sk_negative_common(1)
+ movzbl (%rax),%eax
+ and $15,%al
+ shl $2,%al
+ xchg %eax,%ebx
+ ret
+
+bpf_error:
+# force a return 0 from jit handler
+ xor %eax,%eax
+ mov -8(%rbp),%rbx
+ leaveq
+ ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5a5b6e4dd738..0597f95b6da6 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -30,7 +30,10 @@ int bpf_jit_enable __read_mostly;
* assembly code in arch/x86/net/bpf_jit.S
*/
extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
-extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
+extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
+extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
+extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@@ -117,6 +120,8 @@ static inline void bpf_flush_icache(void *start, void *end)
set_fs(old_fs);
}
+#define CHOOSE_LOAD_FUNC(K, func) \
+ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
void bpf_jit_compile(struct sk_filter *fp)
{
@@ -473,44 +478,46 @@ void bpf_jit_compile(struct sk_filter *fp)
#endif
break;
case BPF_S_LD_W_ABS:
- func = sk_load_word;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_word);
common_load: seen |= SEEN_DATAREF;
- if ((int)K < 0) {
- /* Abort the JIT because __load_pointer() is needed. */
- goto out;
- }
t_offset = func - (image + addrs[i]);
EMIT1_off32(0xbe, K); /* mov imm32,%esi */
EMIT1_off32(0xe8, t_offset); /* call */
break;
case BPF_S_LD_H_ABS:
- func = sk_load_half;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_half);
goto common_load;
case BPF_S_LD_B_ABS:
- func = sk_load_byte;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
goto common_load;
case BPF_S_LDX_B_MSH:
- if ((int)K < 0) {
- /* Abort the JIT because __load_pointer() is needed. */
- goto out;
- }
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
seen |= SEEN_DATAREF | SEEN_XREG;
- t_offset = sk_load_byte_msh - (image + addrs[i]);
+ t_offset = func - (image + addrs[i]);
EMIT1_off32(0xbe, K); /* mov imm32,%esi */
EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
break;
case BPF_S_LD_W_IND:
- func = sk_load_word_ind;
+ func = sk_load_word;
common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG;
t_offset = func - (image + addrs[i]);
- EMIT1_off32(0xbe, K); /* mov imm32,%esi */
+ if (K) {
+ if (is_imm8(K)) {
+ EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */
+ } else {
+ EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
+ EMIT(K, 4);
+ }
+ } else {
+ EMIT2(0x89,0xde); /* mov %ebx,%esi */
+ }
EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */
break;
case BPF_S_LD_H_IND:
- func = sk_load_half_ind;
+ func = sk_load_half;
goto common_load_ind;
case BPF_S_LD_B_IND:
- func = sk_load_byte_ind;
+ func = sk_load_byte;
goto common_load_ind;
case BPF_S_JMP_JA:
t_offset = addrs[i + K] - addrs[i];
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 47936830968c..218cdb16163c 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -225,13 +225,13 @@ static void __restore_processor_state(struct saved_context *ctxt)
fix_processor_context();
do_fpu_end();
+ x86_platform.restore_sched_clock_state();
mtrr_bp_restore();
}
/* Needed by apm.c */
void restore_processor_state(void)
{
- x86_platform.restore_sched_clock_state();
__restore_processor_state(&saved_context);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 988828b479ed..b8e279479a6b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1859,6 +1859,7 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
#endif /* CONFIG_X86_64 */
static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
+static unsigned char fake_ioapic_mapping[PAGE_SIZE] __page_aligned_bss;
static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
{
@@ -1899,7 +1900,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
* We just don't map the IO APIC - all access is via
* hypercalls. Keep the address in the pte for reference.
*/
- pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
+ pte = pfn_pte(PFN_DOWN(__pa(fake_ioapic_mapping)), PAGE_KERNEL);
break;
#endif
@@ -2064,6 +2065,7 @@ void __init xen_init_mmu_ops(void)
pv_mmu_ops = xen_mmu_ops;
memset(dummy_mapping, 0xff, PAGE_SIZE);
+ memset(fake_ioapic_mapping, 0xfd, PAGE_SIZE);
}
/* Protected by xen_reservation_lock. */
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index b480d4207a4c..967633ad98c4 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -12,8 +12,8 @@ int xen_swiotlb __read_mostly;
static struct dma_map_ops xen_swiotlb_dma_ops = {
.mapping_error = xen_swiotlb_dma_mapping_error,
- .alloc_coherent = xen_swiotlb_alloc_coherent,
- .free_coherent = xen_swiotlb_free_coherent,
+ .alloc = xen_swiotlb_alloc_coherent,
+ .free = xen_swiotlb_free_coherent,
.sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
.sync_single_for_device = xen_swiotlb_sync_single_for_device,
.sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 02900e8ce26c..5fac6919b957 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -59,7 +59,7 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
static void __cpuinit cpu_bringup(void)
{
- int cpu = smp_processor_id();
+ int cpu;
cpu_init();
touch_softlockup_watchdog();