diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 61007663ab3f7b..6c3d97821b0376 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1696,6 +1696,32 @@ config ARM64_TAGGED_ADDR_ABI to system calls as pointer arguments. For details, see Documentation/arch/arm64/tagged-address-abi.rst. +config ARM64_FORCE_PCIE_MMIO_DEVICE_MAPPINGS + bool "Force Device memory mappings for PCIe MMIO space" + default y + help + PCIe device drivers may map MMIO space as Normal non-cacheable, + for the purpose of enabling write combining or unaligned accesses. + + On many platforms (e.g. Ampere Altra, RK35xx), the PCIe interface + cannot support unaligned outbound transactions. This may lead to + data corruption, for instance, when a regular memcpy is performed by + an application on a GPU's VRAM BAR. + + This option forces all software that maps PCIe MMIO space as Normal + non-cacheable memory to use Device-nGnRE instead. If the strict alignment + is not met, the CPU will raise alignment faults that can be further + handled by the kernel by enabling CONFIG_ARM64_ALIGNMENT_FIXUPS. + +config ARM64_ALIGNMENT_FIXUPS + bool "Fix up misaligned multi-word loads and stores in 64-bit kernel/user space" + default y + help + This option enables kernel/user space code to perform unaligned accesses + to memory regions that do not normally support them (e.g. device mappings) + by trapping alignment faults on common load/store instructions and breaking + up the offending accesses into properly aligned ones. + menuconfig COMPAT bool "Kernel support for 32-bit EL0" depends on ARM64_4K_PAGES || EXPERT diff --git a/arch/arm64/configs/bcm2711_defconfig b/arch/arm64/configs/bcm2711_defconfig index f7081ebff6bd2f..ca814b94ac6070 100644 --- a/arch/arm64/configs/bcm2711_defconfig +++ b/arch/arm64/configs/bcm2711_defconfig @@ -1070,6 +1070,15 @@ CONFIG_AUXDISPLAY=y CONFIG_HD44780=m CONFIG_DRM=m CONFIG_DRM_LOAD_EDID_FIRMWARE=y +CONFIG_DRM_RADEON=m +CONFIG_DRM_AMDGPU=m +CONFIG_DRM_AMDGPU_SI=y +CONFIG_DRM_AMDGPU_CIK=y +CONFIG_DRM_NOUVEAU=m +CONFIG_NOUVEAU_DEBUG_MMU=y +CONFIG_NOUVEAU_DEBUG_PUSH=y +CONFIG_DRM_XE=m +CONFIG_DRM_XE_FORCE_PROBE="*" CONFIG_DRM_UDL=m CONFIG_DRM_PANEL_LVDS=m CONFIG_DRM_PANEL_ILITEK_IL79600A=m @@ -1133,6 +1142,9 @@ CONFIG_SND_SERIAL_U16550=m CONFIG_SND_MPU401=m CONFIG_SND_PIMIDI=m CONFIG_SND_PISOUND_MICRO=m +CONFIG_SND_HDA_INTEL=m +CONFIG_SND_HDA_GENERIC=m +CONFIG_SND_HDA_CODEC_HDMI=m CONFIG_SND_USB_AUDIO=m CONFIG_SND_USB_UA101=m CONFIG_SND_USB_CAIAQ=m diff --git a/arch/arm64/configs/bcm2712_defconfig b/arch/arm64/configs/bcm2712_defconfig index f873143ea5ffb5..3e9ddac8a700f9 100644 --- a/arch/arm64/configs/bcm2712_defconfig +++ b/arch/arm64/configs/bcm2712_defconfig @@ -1072,6 +1072,12 @@ CONFIG_AUXDISPLAY=y CONFIG_HD44780=m CONFIG_DRM=m CONFIG_DRM_LOAD_EDID_FIRMWARE=y +CONFIG_DRM_RADEON=m +CONFIG_DRM_AMDGPU=m +CONFIG_DRM_AMDGPU_SI=y +CONFIG_DRM_AMDGPU_CIK=y +CONFIG_DRM_XE=m +CONFIG_DRM_XE_FORCE_PROBE="*" CONFIG_DRM_UDL=m CONFIG_DRM_PANEL_LVDS=m CONFIG_DRM_PANEL_ILITEK_IL79600A=m @@ -1135,6 +1141,9 @@ CONFIG_SND_SERIAL_U16550=m CONFIG_SND_MPU401=m CONFIG_SND_PIMIDI=m CONFIG_SND_PISOUND_MICRO=m +CONFIG_SND_HDA_INTEL=m +CONFIG_SND_HDA_GENERIC=m +CONFIG_SND_HDA_CODEC_HDMI=m CONFIG_SND_USB_AUDIO=m CONFIG_SND_USB_UA101=m CONFIG_SND_USB_CAIAQ=m diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index a2da3cb21c244a..9113ed1150ac12 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -82,6 +82,7 @@ void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs) void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr); void do_el0_cp15(unsigned long esr, struct pt_regs *regs); int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs); +int do_alignment_fixup(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_el0_svc(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs); void do_el0_fpac(struct pt_regs *regs, unsigned long esr); diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 9016ae8de5c9e2..7771b466c4f6d9 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -338,11 +338,6 @@ static inline pte_t pte_mkyoung(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_AF)); } -static inline pte_t pte_mkspecial(pte_t pte) -{ - return set_pte_bit(pte, __pgprot(PTE_SPECIAL)); -} - static inline pte_t pte_mkcont(pte_t pte) { return set_pte_bit(pte, __pgprot(PTE_CONT)); @@ -802,6 +797,21 @@ static inline void __set_puds(struct mm_struct *mm, __pgprot_modify(prot, PTE_ATTRINDX_MASK, \ PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) +extern bool range_is_pci(phys_addr_t phys_addr, size_t size); + +static inline pte_t pte_mkspecial(pte_t pte) +{ +#ifdef CONFIG_ARM64_FORCE_PCIE_MMIO_DEVICE_MAPPINGS + phys_addr_t phys = __pte_to_phys(pte); + pgprot_t prot = __pgprot(pte_val(pte) & ~__phys_to_pte_val(__pte_to_phys(__pte(~0ull)))); + + if ((pgprot_val(prot) != pgprot_val(pgprot_device(prot))) && + range_is_pci(phys, PAGE_SIZE)) + pte = __pte(__phys_to_pte_val(phys) | pgprot_val(pgprot_device(prot))); +#endif + return set_pte_bit(pte, __pgprot(PTE_SPECIAL)); +} + #define __HAVE_PHYS_MEM_ACCESS_PROT struct file; extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 76f32e424065e5..3329d008a6de95 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -36,6 +36,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ syscall.o proton-pack.o idle.o patching.o pi/ \ rsi.o jump_label.o +obj-$(CONFIG_ARM64_ALIGNMENT_FIXUPS) += alignment.o obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ sys_compat.o obj-$(CONFIG_COMPAT) += sigreturn32.o diff --git a/arch/arm64/kernel/alignment.c b/arch/arm64/kernel/alignment.c new file mode 100644 index 00000000000000..0b737102bd20b3 --- /dev/null +++ b/arch/arm64/kernel/alignment.c @@ -0,0 +1,1031 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 Ampere Computing LLC + * Copyright (C) 2025 Mario Bălănică + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 13, 0) +#include +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(5, 14, 0) +#include +#endif +#include +#include + +static __always_inline int __aarch64_insn_is_class_ldst(u32 insn) +{ + return (insn & 0x0A000000) == 0x08000000; +} + +static __always_inline int __aarch64_insn_is_dc_zva(u32 insn) +{ + return (insn & 0xFFFFFFE0) == 0xD50B7420; +} + +static int copy_from_user_io(void *to, const void __user *from, unsigned long n) +{ + const u8 __user *src = from; + u8 *dest = to; + + for (; n; n--) + if (get_user(*dest++, src++)) + break; + return n; +} + +static int copy_to_user_io(void __user *to, const void *from, unsigned long n) +{ + const u8 *src = from; + u8 __user *dest = to; + + for (; n; n--) + if (put_user(*src++, dest++)) + break; + return n; +} + +static int align_load(unsigned long addr, int sz, u64 *out) +{ + union { + u8 d8; + u16 d16; + u32 d32; + u64 d64; + char c[8]; + } data; + + if (sz != 1 && sz != 2 && sz != 4 && sz != 8) + return 1; + if (is_ttbr0_addr(addr)) { + if (copy_from_user_io(data.c, (const void __user *)addr, sz)) + return 1; + } else + memcpy_fromio(data.c, (const void __iomem *)addr, sz); + switch (sz) { + case 1: + *out = data.d8; + break; + case 2: + *out = data.d16; + break; + case 4: + *out = data.d32; + break; + case 8: + *out = data.d64; + break; + default: + return 1; + } + return 0; +} + +static int align_store(unsigned long addr, int sz, u64 val) +{ + union { + u8 d8; + u16 d16; + u32 d32; + u64 d64; + char c[8]; + } data; + + switch (sz) { + case 1: + data.d8 = val; + break; + case 2: + data.d16 = val; + break; + case 4: + data.d32 = val; + break; + case 8: + data.d64 = val; + break; + default: + return 1; + } + if (is_ttbr0_addr(addr)) { + if (copy_to_user_io((void __user *)addr, data.c, sz)) + return 1; + } else + memcpy_toio((void __iomem *)addr, data.c, sz); + return 0; +} + +static int align_dc_zva(unsigned long addr, struct pt_regs *regs) +{ + int bs = read_cpuid(DCZID_EL0) & 0xf; + int sz = 1 << (bs + 2); + + addr &= ~(sz - 1); + if (is_ttbr0_addr(addr)) { + for (; sz; sz--) { + if (align_store(addr++, 1, 0)) + return 1; + } + } else + memset_io((void __iomem *)addr, 0, sz); + return 0; +} + +static __always_inline u64 get_vn_dt(int n, int t) +{ + return ((u64 *)¤t->thread.uw.fpsimd_state.vregs[n])[t]; +} + +static __always_inline void set_vn_dt(int n, int t, u64 val) +{ + ((u64 *)¤t->thread.uw.fpsimd_state.vregs[n])[t] = val; +} + +static __always_inline int kernel_neon_wrapper(int (*fn)(u32 insn, struct pt_regs *regs), + u32 insn, struct pt_regs *regs) +{ + int ret; + + kernel_neon_begin(); + ret = fn(insn, regs); + kernel_neon_end(); + + return ret; +} + +static u64 replicate64(u64 val, int bits) +{ + switch (bits) { + case 8: + val = (val << 8) | (val & 0xff); + fallthrough; + case 16: + val = (val << 16) | (val & 0xffff); + fallthrough; + case 32: + val = (val << 32) | (val & 0xffffffff); + break; + default: + break; + } + return val; +} + +static u64 elem_get(u64 hi, u64 lo, int index, int esize) +{ + int shift = index * esize; + u64 mask = GENMASK(esize - 1, 0); + + if (shift < 64) + return (lo >> shift) & mask; + else + return (hi >> (shift - 64)) & mask; +} + +static void elem_set(u64 *hi, u64 *lo, int index, int esize, u64 val) +{ + int shift = index * esize; + u64 mask = GENMASK(esize - 1, 0); + + if (shift < 64) + *lo = (*lo & ~(mask << shift)) | ((val & mask) << shift); + else + *hi = (*hi & ~(mask << (shift - 64))) | ((val & mask) << (shift - 64)); +} + +static int align_ldst_pair(u32 insn, struct pt_regs *regs) +{ + const u32 OPC = GENMASK(31, 30); + const u32 L_MASK = BIT(22); + + int opc = FIELD_GET(OPC, insn); + int L = FIELD_GET(L_MASK, insn); + + bool wback = !!(insn & BIT(23)); + bool postindex = !(insn & BIT(24)); + + int n = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, insn); + int t = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + int t2 = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT2, insn); + bool is_store = !L; + bool is_signed = !!(opc & 1); + int scale = 2 + (opc >> 1); + int datasize = 8 << scale; + u64 uoffset = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_7, insn); + s64 offset = sign_extend64(uoffset, 6) << scale; + u64 address; + u64 data1, data2; + u64 dbytes; + + if ((is_store && (opc & 1)) || opc == 3) + return 1; + + if (wback && (t == n || t2 == n) && n != 31) + return 1; + + if (!is_store && t == t2) + return 1; + + dbytes = datasize / 8; + + address = regs_get_register(regs, n << 3); + + if (!postindex) + address += offset; + + if (is_store) { + data1 = pt_regs_read_reg(regs, t); + data2 = pt_regs_read_reg(regs, t2); + if (align_store(address, dbytes, data1) || + align_store(address + dbytes, dbytes, data2)) + return 1; + } else { + if (align_load(address, dbytes, &data1) || + align_load(address + dbytes, dbytes, &data2)) + return 1; + if (is_signed) { + data1 = sign_extend64(data1, datasize - 1); + data2 = sign_extend64(data2, datasize - 1); + } + pt_regs_write_reg(regs, t, data1); + pt_regs_write_reg(regs, t2, data2); + } + + if (wback) { + if (postindex) + address += offset; + if (n == 31) + regs->sp = address; + else + pt_regs_write_reg(regs, n, address); + } + + return 0; +} + +static int align_ldst_pair_simdfp(u32 insn, struct pt_regs *regs) +{ + const u32 OPC = GENMASK(31, 30); + const u32 L_MASK = BIT(22); + + int opc = FIELD_GET(OPC, insn); + int L = FIELD_GET(L_MASK, insn); + + bool wback = !!(insn & BIT(23)); + bool postindex = !(insn & BIT(24)); + + int n = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, insn); + int t = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + int t2 = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT2, insn); + bool is_store = !L; + int scale = 2 + opc; + int datasize = 8 << scale; + u64 uoffset = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_7, insn); + s64 offset = sign_extend64(uoffset, 6) << scale; + u64 address; + u64 data1_d0, data1_d1, data2_d0, data2_d1; + u64 dbytes; + + if (opc == 0x3) + return 1; + + if (!is_store && t == t2) + return 1; + + dbytes = datasize / 8; + + address = regs_get_register(regs, n << 3); + + if (!postindex) + address += offset; + + if (is_store) { + data1_d0 = get_vn_dt(t, 0); + data2_d0 = get_vn_dt(t2, 0); + if (datasize == 128) { + data1_d1 = get_vn_dt(t, 1); + data2_d1 = get_vn_dt(t2, 1); + if (align_store(address, 8, data1_d0) || + align_store(address + 8, 8, data1_d1) || + align_store(address + 16, 8, data2_d0) || + align_store(address + 24, 8, data2_d1)) + return 1; + } else { + if (align_store(address, dbytes, data1_d0) || + align_store(address + dbytes, dbytes, data2_d0)) + return 1; + } + } else { + if (datasize == 128) { + if (align_load(address, 8, &data1_d0) || + align_load(address + 8, 8, &data1_d1) || + align_load(address + 16, 8, &data2_d0) || + align_load(address + 24, 8, &data2_d1)) + return 1; + } else { + if (align_load(address, dbytes, &data1_d0) || + align_load(address + dbytes, dbytes, &data2_d0)) + return 1; + data1_d1 = data2_d1 = 0; + } + set_vn_dt(t, 0, data1_d0); + set_vn_dt(t, 1, data1_d1); + set_vn_dt(t2, 0, data2_d0); + set_vn_dt(t2, 1, data2_d1); + } + + if (wback) { + if (postindex) + address += offset; + if (n == 31) + regs->sp = address; + else + pt_regs_write_reg(regs, n, address); + } + + return 0; +} + +static int align_ldst_regoff(u32 insn, struct pt_regs *regs) +{ + const u32 SIZE = GENMASK(31, 30); + const u32 OPC = GENMASK(23, 22); + const u32 OPTION = GENMASK(15, 13); + const u32 S = BIT(12); + + u32 size = FIELD_GET(SIZE, insn); + u32 opc = FIELD_GET(OPC, insn); + u32 option = FIELD_GET(OPTION, insn); + u32 s = FIELD_GET(S, insn); + int scale = size; + int extend_len = (option & 0x1) ? 64 : 32; + bool extend_unsigned = !(option & 0x4); + int shift = s ? scale : 0; + + int n = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, insn); + int t = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + int m = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RM, insn); + bool is_store; + bool is_signed; + int regsize; + int datasize; + u64 offset; + u64 address; + u64 data; + + if ((opc & 0x2) == 0) { + /* store or zero-extending load */ + is_store = !(opc & 0x1); + regsize = size == 0x3 ? 64 : 32; + is_signed = false; + } else { + if (size == 0x3) { + if ((opc & 0x1) == 0) { + /* prefetch */ + return 0; + } + /* undefined */ + return 1; + } + /* sign-extending load */ + is_store = false; + if (size == 0x2 && (opc & 0x1) == 0x1) { + /* undefined */ + return 1; + } + regsize = (opc & 0x1) == 0x1 ? 32 : 64; + is_signed = true; + } + + datasize = 8 << scale; + + if (n == t && n != 31) + return 1; + + offset = pt_regs_read_reg(regs, m); + if (extend_len == 32) { + offset &= (u32)~0; + if (!extend_unsigned) + sign_extend64(offset, 31); + } + offset <<= shift; + + address = regs_get_register(regs, n << 3) + offset; + + if (is_store) { + data = pt_regs_read_reg(regs, t); + if (align_store(address, datasize / 8, data)) + return 1; + } else { + if (align_load(address, datasize / 8, &data)) + return 1; + if (is_signed) { + if (regsize == 32) + data = sign_extend32(data, datasize - 1); + else + data = sign_extend64(data, datasize - 1); + } + pt_regs_write_reg(regs, t, data); + } + + return 0; +} + +static int align_ldst_regoff_simdfp(u32 insn, struct pt_regs *regs) +{ + const u32 SIZE = GENMASK(31, 30); + const u32 OPC = GENMASK(23, 22); + const u32 OPTION = GENMASK(15, 13); + const u32 S = BIT(12); + + u32 size = FIELD_GET(SIZE, insn); + u32 opc = FIELD_GET(OPC, insn); + u32 option = FIELD_GET(OPTION, insn); + u32 s = FIELD_GET(S, insn); + int scale = (opc & 0x2) << 1 | size; + int extend_len = (option & 0x1) ? 64 : 32; + bool extend_unsigned = !(option & 0x4); + int shift = s ? scale : 0; + + int n = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, insn); + int t = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + int m = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RM, insn); + bool is_store = !(opc & BIT(0)); + int datasize; + u64 offset; + u64 address; + u64 data_d0, data_d1; + + if ((option & 0x2) == 0) + return 1; + + datasize = 8 << scale; + + if (n == t && n != 31) + return 1; + + offset = pt_regs_read_reg(regs, m); + if (extend_len == 32) { + offset &= (u32)~0; + if (!extend_unsigned) + sign_extend64(offset, 31); + } + offset <<= shift; + + address = regs_get_register(regs, n << 3) + offset; + + if (is_store) { + data_d0 = get_vn_dt(t, 0); + if (datasize == 128) { + data_d1 = get_vn_dt(t, 1); + if (align_store(address, 8, data_d0) || + align_store(address + 8, 8, data_d1)) + return 1; + } else { + if (align_store(address, datasize / 8, data_d0)) + return 1; + } + } else { + if (datasize == 128) { + if (align_load(address, 8, &data_d0) || + align_load(address + 8, 8, &data_d1)) + return 1; + } else { + if (align_load(address, datasize / 8, &data_d0)) + return 1; + data_d1 = 0; + } + set_vn_dt(t, 0, data_d0); + set_vn_dt(t, 1, data_d1); + } + + return 0; +} + +static int align_ldst_imm(u32 insn, struct pt_regs *regs) +{ + const u32 SIZE = GENMASK(31, 30); + const u32 OPC = GENMASK(23, 22); + + u32 size = FIELD_GET(SIZE, insn); + u32 opc = FIELD_GET(OPC, insn); + bool wback = !(insn & BIT(24)) && !!(insn & BIT(10)); + bool postindex = wback && !(insn & BIT(11)); + int scale = size; + u64 offset; + + int n = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, insn); + int t = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + bool is_store; + bool is_signed; + int regsize; + int datasize; + u64 address; + u64 data; + + if (!(insn & BIT(24))) { + u64 uoffset = + aarch64_insn_decode_immediate(AARCH64_INSN_IMM_9, insn); + offset = sign_extend64(uoffset, 8); + } else { + offset = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_12, insn); + offset <<= scale; + } + + if ((opc & 0x2) == 0) { + /* store or zero-extending load */ + is_store = !(opc & 0x1); + regsize = size == 0x3 ? 64 : 32; + is_signed = false; + } else { + if (size == 0x3) { + if (FIELD_GET(GENMASK(11, 10), insn) == 0 && (opc & 0x1) == 0) { + /* prefetch */ + return 0; + } + /* undefined */ + return 1; + } + /* sign-extending load */ + is_store = false; + if (size == 0x2 && (opc & 0x1) == 0x1) { + /* undefined */ + return 1; + } + regsize = (opc & 0x1) == 0x1 ? 32 : 64; + is_signed = true; + } + + datasize = 8 << scale; + + if (n == t && n != 31) + return 1; + + address = regs_get_register(regs, n << 3); + + if (!postindex) + address += offset; + + if (is_store) { + data = pt_regs_read_reg(regs, t); + if (align_store(address, datasize / 8, data)) + return 1; + } else { + if (align_load(address, datasize / 8, &data)) + return 1; + if (is_signed) { + if (regsize == 32) + data = sign_extend32(data, datasize - 1); + else + data = sign_extend64(data, datasize - 1); + } + pt_regs_write_reg(regs, t, data); + } + + if (wback) { + if (postindex) + address += offset; + if (n == 31) + regs->sp = address; + else + pt_regs_write_reg(regs, n, address); + } + + return 0; +} + +static int align_ldst_imm_simdfp(u32 insn, struct pt_regs *regs) +{ + const u32 SIZE = GENMASK(31, 30); + const u32 OPC = GENMASK(23, 22); + + u32 size = FIELD_GET(SIZE, insn); + u32 opc = FIELD_GET(OPC, insn); + bool wback = !(insn & BIT(24)) && !!(insn & BIT(10)); + bool postindex = wback && !(insn & BIT(11)); + int scale = (opc & 0x2) << 1 | size; + u64 offset; + + int n = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, insn); + int t = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + bool is_store = !(opc & BIT(0)); + int datasize; + u64 address; + u64 data_d0, data_d1; + + if (scale > 4) + return 1; + + if (!(insn & BIT(24))) { + u64 uoffset = + aarch64_insn_decode_immediate(AARCH64_INSN_IMM_9, insn); + offset = sign_extend64(uoffset, 8); + } else { + offset = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_12, insn); + offset <<= scale; + } + + datasize = 8 << scale; + + address = regs_get_register(regs, n << 3); + + if (!postindex) + address += offset; + + if (is_store) { + data_d0 = get_vn_dt(t, 0); + if (datasize == 128) { + data_d1 = get_vn_dt(t, 1); + if (align_store(address, 8, data_d0) || + align_store(address + 8, 8, data_d1)) + return 1; + } else { + if (align_store(address, datasize / 8, data_d0)) + return 1; + } + } else { + if (datasize == 128) { + if (align_load(address, 8, &data_d0) || + align_load(address + 8, 8, &data_d1)) + return 1; + } else { + if (align_load(address, datasize / 8, &data_d0)) + return 1; + data_d1 = 0; + } + set_vn_dt(t, 0, data_d0); + set_vn_dt(t, 1, data_d1); + } + + if (wback) { + if (postindex) + address += offset; + if (n == 31) + regs->sp = address; + else + pt_regs_write_reg(regs, n, address); + } + + return 0; +} + +static int align_ldst_vector_multiple(u32 insn, struct pt_regs *regs) +{ + const u32 Q_MASK = BIT(30); + const u32 L_MASK = BIT(22); + const u32 OPCODE = GENMASK(15, 12); + const u32 SIZE = GENMASK(11, 10); + + u32 Q = FIELD_GET(Q_MASK, insn); + u32 L = FIELD_GET(L_MASK, insn); + u32 opcode = FIELD_GET(OPCODE, insn); + u32 size = FIELD_GET(SIZE, insn); + + int t = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + int n = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, insn); + int m = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RM, insn); + bool wback = !!(insn & BIT(23)); + + int datasize = Q ? 128 : 64; + int esize = 8 << size; + int elements = datasize / esize; + int rpt; + int selem; + u64 address; + u64 offs; + u64 rval_d0, rval_d1; + int tt; + int ebytes; + int r; + int e; + int s; + u64 data; + + switch (opcode) { + case 0: // LD/ST4 (4 registers) + rpt = 1; + selem = 4; + break; + case 2: // LD/ST1 (4 registers) + rpt = 4; + selem = 1; + break; + case 4: // LD/ST3 (3 registers) + rpt = 1; + selem = 3; + break; + case 6: // LD/ST1 (3 registers) + rpt = 3; + selem = 1; + break; + case 7: // LD/ST1 (1 register) + rpt = 1; + selem = 1; + break; + case 8: // LD/ST2 (2 registers) + rpt = 1; + selem = 2; + break; + case 10: // LD/ST1 (2 registers) + rpt = 2; + selem = 1; + break; + default: + return 1; + } + + if (size == 3 && Q == 0 && selem != 1) + return 1; + + ebytes = esize / 8; + + address = regs_get_register(regs, n << 3); + + offs = 0; + + for (r = 0; r < rpt; r++) { + for (e = 0; e < elements; e++) { + tt = (t + r) % 32; + for (s = 0; s < selem; s++) { + rval_d0 = get_vn_dt(tt, 0); + rval_d1 = get_vn_dt(tt, 1); + if (L) { + if (align_load(address + offs, ebytes, &data)) + return 1; + elem_set(&rval_d1, &rval_d0, e, esize, data); + set_vn_dt(tt, 0, rval_d0); + set_vn_dt(tt, 1, rval_d1); + } else { + data = elem_get(rval_d1, rval_d0, e, esize); + if (align_store(address + offs, ebytes, data)) + return 1; + } + offs += ebytes; + tt = (tt + 1) % 32; + } + } + } + + if (wback) { + if (m != 31) + offs = regs_get_register(regs, m << 3); + if (n == 31) + regs->sp = address + offs; + else + pt_regs_write_reg(regs, n, address + offs); + } + + return 0; +} + +static int align_ldst_vector_single(u32 insn, struct pt_regs *regs) +{ + const u32 Q_MASK = BIT(30); + const u32 L_MASK = BIT(22); + const u32 R_MASK = BIT(21); + const u32 OPCODE = GENMASK(15, 13); + const u32 S_MASK = BIT(12); + const u32 SIZE = GENMASK(11, 10); + + u32 Q = FIELD_GET(Q_MASK, insn); + u32 L = FIELD_GET(L_MASK, insn); + u32 R = FIELD_GET(R_MASK, insn); + u32 opcode = FIELD_GET(OPCODE, insn); + u32 S = FIELD_GET(S_MASK, insn); + u32 size = FIELD_GET(SIZE, insn); + + int t = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + int n = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, insn); + int m = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RM, insn); + bool wback = !!(insn & BIT(23)); + + int init_scale = opcode >> 1; + int scale = init_scale; + int selem = (((opcode & 1) << 1) | R) + 1; + bool replicate = false; + int index; + int datasize; + int esize; + u64 address; + u64 offs; + u64 rval_d0, rval_d1; + u64 element; + int ebytes; + int s; + u64 data; + + switch (scale) { + case 3: + if (!L || S) + return 1; + scale = size; + replicate = true; + break; + case 0: + index = (Q << 3) | (S << 2) | size; + break; + case 1: + if (size & 1) + return 1; + index = (Q << 2) | (S << 1) | (size >> 1); + break; + case 2: + if (size & 2) + return 1; + if (!(size & 1)) + index = (Q << 1) | S; + else { + if (S) + return 1; + index = Q; + scale = 3; + } + break; + } + + datasize = Q ? 128 : 64; + esize = 8 << scale; + + ebytes = esize / 8; + + address = regs_get_register(regs, n << 3); + + offs = 0; + + if (replicate) { + for (s = 0; s < selem; s++) { + if (align_load(address + offs, ebytes, &element)) + return 1; + data = replicate64(element, esize); + set_vn_dt(t, 0, data); + if (datasize == 128) + set_vn_dt(t, 1, data); + else + set_vn_dt(t, 1, 0); + offs += ebytes; + t = (t + 1) & 31; + } + } else { + for (s = 0; s < selem; s++) { + rval_d0 = get_vn_dt(t, 0); + rval_d1 = get_vn_dt(t, 1); + if (L) { + if (align_load(address + offs, ebytes, &data)) + return 1; + elem_set(&rval_d1, &rval_d0, index, esize, data); + set_vn_dt(t, 0, rval_d0); + set_vn_dt(t, 1, rval_d1); + } else { + data = elem_get(rval_d1, rval_d0, index, esize); + if (align_store(address + offs, ebytes, data)) + return 1; + } + offs += ebytes; + t = (t + 1) & 31; + } + } + + if (wback) { + if (m != 31) + offs = regs_get_register(regs, m << 3); + if (n == 31) + regs->sp = address + offs; + else + pt_regs_write_reg(regs, n, address + offs); + } + + return 0; +} + +static int align_ldst(u32 insn, struct pt_regs *regs) +{ + const u32 op0 = FIELD_GET(GENMASK(31, 28), insn); + const u32 op1 = FIELD_GET(BIT(26), insn); + const u32 op2 = FIELD_GET(GENMASK(24, 23), insn); + const u32 op3 = FIELD_GET(GENMASK(21, 16), insn); + const u32 op4 = FIELD_GET(GENMASK(11, 10), insn); + + if ((op0 & 0x3) == 0x2) { + /* + * |------+-----+-----+-----+-----+-----------------------------------------| + * | op0 | op1 | op2 | op3 | op4 | Decode group | + * |------+-----+-----+-----+-----+-----------------------------------------| + * | xx10 | - | 00 | - | - | Load/store no-allocate pair (offset) | + * | xx10 | - | 01 | - | - | Load/store register pair (post-indexed) | + * | xx10 | - | 10 | - | - | Load/store register pair (offset) | + * | xx10 | - | 11 | - | - | Load/store register pair (pre-indexed) | + * |------+-----+-----+-----+-----+-----------------------------------------| + */ + + if (op1 == 0) { /* V == 0 */ + /* general */ + return align_ldst_pair(insn, regs); + } + /* simdfp */ + return kernel_neon_wrapper(align_ldst_pair_simdfp, insn, regs); + } else if ((op0 & 0x3) == 0x3 && + (((op2 & 0x2) == 0 && (op3 & 0x20) == 0 && op4 != 0x2) || + ((op2 & 0x2) == 0x2))) { + /* + * |------+-----+-----+--------+-----+---------------------------------------------| + * | op0 | op1 | op2 | op3 | op4 | Decode group | + * |------+-----+-----+--------+-----+---------------------------------------------| + * | xx11 | - | 0x | 0xxxxx | 00 | Load/store register (unscaled immediate) | + * | xx11 | - | 0x | 0xxxxx | 01 | Load/store register (immediate post-indexed | + * | xx11 | - | 0x | 0xxxxx | 11 | Load/store register (immediate pre-indexed) | + * | xx11 | - | 1x | - | - | Load/store register (unsigned immediate) | + * |------+-----+-----+--------+-----+---------------------------------------------| + */ + + if (op1 == 0) { /* V == 0 */ + /* general */ + return align_ldst_imm(insn, regs); + } + /* simdfp */ + return kernel_neon_wrapper(align_ldst_imm_simdfp, insn, regs); + } else if ((op0 & 0x3) == 0x3 && (op2 & 0x2) == 0 && + (op3 & 0x20) == 0x20 && op4 == 0x2) { + /* + * |------+-----+-----+--------+-----+---------------------------------------| + * | op0 | op1 | op2 | op3 | op4 | | + * |------+-----+-----+--------+-----+---------------------------------------| + * | xx11 | - | 0x | 1xxxxx | 10 | Load/store register (register offset) | + * |------+-----+-----+--------+-----+---------------------------------------| + */ + if (op1 == 0) { /* V == 0 */ + /* general */ + return align_ldst_regoff(insn, regs); + } + /* simdfp */ + return kernel_neon_wrapper(align_ldst_regoff_simdfp, insn, regs); + } else if ((op0 & 0xb) == 0 && op1 == 1 && + ((op2 == 0 && op3 == 0) || (op2 == 1 && ((op3 & 0x20) == 0)))) { + /* + * |------+-----+-----+--------+-----+---------------------------------------------| + * | op0 | op1 | op2 | op3 | op4 | | + * |------+-----+-----+--------+-----+---------------------------------------------| + * | 0x00 | 1 | 00 | 000000 | - | Advanced SIMD load/store multiple structure | + * | 0x00 | 1 | 01 | 0xxxxx | - | Advanced SIMD load/store multiple structure | + * | | | | | | (post-indexed) | + * |------+-----+-----+--------+-----+---------------------------------------------| + */ + return kernel_neon_wrapper(align_ldst_vector_multiple, insn, regs); + } else if ((op0 & 0xb) == 0 && op1 == 1 && + ((op2 == 2 && ((op3 & 0x1f) == 0)) || op2 == 3)) { + /* + * |------+-----+-----+--------+-----+-------------------------------------------| + * | op0 | op1 | op2 | op3 | op4 | | + * |------+-----+-----+--------+-----+-------------------------------------------| + * | 0x00 | 1 | 10 | x00000 | - | Advanced SIMD load/store single structure | + * | 0x00 | 1 | 11 | - | - | Advanced SIMD load/store single structure | + * | | | | | | (post-indexed) | + * |------+-----+-----+--------+-----+-------------------------------------------| + */ + return kernel_neon_wrapper(align_ldst_vector_single, insn, regs); + } else + return 1; +} + +int do_alignment_fixup(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + u32 insn; + int res; + + if (user_mode(regs)) { + __le32 insn_le; + + if (!is_ttbr0_addr(addr)) + return 1; + + if (get_user(insn_le, + (__le32 __user *)instruction_pointer(regs))) + return 1; + insn = le32_to_cpu(insn_le); + } else { + if (aarch64_insn_read((void *)instruction_pointer(regs), &insn)) + return 1; + } + + if (__aarch64_insn_is_class_ldst(insn)) + res = align_ldst(insn, regs); + else if (__aarch64_insn_is_dc_zva(insn)) + res = align_dc_zva(addr, regs); + else + res = 1; + + if (!res) { + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->pc); + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); + } + return res; +} diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index a193b6a5d1e65f..4a4e3bc9188c95 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -795,8 +795,10 @@ static int __kprobes do_translation_fault(unsigned long far, static int do_alignment_fault(unsigned long far, unsigned long esr, struct pt_regs *regs) { - if (IS_ENABLED(CONFIG_COMPAT_ALIGNMENT_FIXUPS) && - compat_user_mode(regs)) + if (!compat_user_mode(regs)) { + if (IS_ENABLED(CONFIG_ARM64_ALIGNMENT_FIXUPS)) + return do_alignment_fixup(far, esr, regs); + } else if (IS_ENABLED(CONFIG_COMPAT_ALIGNMENT_FIXUPS)) return do_compat_alignment_fixup(far, regs); do_bad_area(far, esr, regs); return 0; diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 5b1dee660a07d2..7b80f71364ad23 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -183,6 +183,22 @@ config DRM_LOAD_EDID_FIRMWARE default case is N. Details and instructions how to build your own EDID data are given in Documentation/admin-guide/edid.rst. +config DRM_FORCE_DMA_WRITE_COMBINED_MAPPINGS + bool "Force write-combined mappings for DMA" + default y + help + PCIe GPU device drivers may use normal cached mappings for DMA memory. + + This requires the PCIe interface to be coherent with the CPU caches, + which is not supported by many Arm platforms (e.g. RK35xx), leading to + data corruption on inbound transactions. + + Enable this option to force write-combined mappings instead (Normal + non-cacheable on Arm). + + Disable if the hardware supports coherency, as it might cause issues on + certain platforms that ignore the PCIe NoSnoop TLP attribute. + source "drivers/gpu/drm/display/Kconfig" config DRM_TTM diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index fac9f65a6948ff..fadae4e410f536 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7657,6 +7657,7 @@ int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state, * FIXME: Cut over to (async) commit helpers instead of hand-rolling * everything. */ +#if defined(CONFIG_VGA_CONSOLE) if (state->base.legacy_cursor_update) { struct intel_crtc_state *new_crtc_state; struct intel_crtc *crtc; @@ -7665,7 +7666,7 @@ int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state, for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) complete_all(&new_crtc_state->uapi.commit->flip_done); } - +#endif ret = intel_atomic_prepare_commit(state); if (ret) { drm_dbg_atomic(display->drm, diff --git a/drivers/gpu/drm/i915/display/intel_vga.c b/drivers/gpu/drm/i915/display/intel_vga.c index 6e125564db34c9..31d3f1f504d26f 100644 --- a/drivers/gpu/drm/i915/display/intel_vga.c +++ b/drivers/gpu/drm/i915/display/intel_vga.c @@ -78,6 +78,7 @@ void intel_vga_disable(struct intel_display *display) void intel_vga_reset_io_mem(struct intel_display *display) { +#if defined(CONFIG_VGA_CONSOLE) struct pci_dev *pdev = to_pci_dev(display->drm->dev); /* @@ -93,6 +94,7 @@ void intel_vga_reset_io_mem(struct intel_display *display) vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); outb(inb(VGA_MIS_R), VGA_MIS_W); vga_put(pdev, VGA_RSRC_LEGACY_IO); +#endif } int intel_vga_register(struct intel_display *display) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 55abc510067bc8..bb07eedc9a8a80 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -317,6 +317,9 @@ nouveau_drm(struct drm_device *dev) static inline bool nouveau_drm_use_coherent_gpu_mapping(struct nouveau_drm *drm) { +#ifdef CONFIG_DRM_FORCE_DMA_WRITE_COMBINED_MAPPINGS + return false; +#endif struct nvif_mmu *mmu = &drm->client.mmu; return !(mmu->type[drm->ttm.type_host[0]].type & NVIF_MEM_UNCACHED); } diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c index bd870028514b66..f3a1902bc7e0d9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c +++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c @@ -72,9 +72,7 @@ nouveau_sgdma_create_ttm(struct ttm_buffer_object *bo, uint32_t page_flags) struct nouveau_sgdma_be *nvbe; enum ttm_caching caching; - if (nvbo->force_coherent) - caching = ttm_uncached; - else if (drm->agp.bridge) + if (nvbo->force_coherent || drm->agp.bridge) caching = ttm_write_combined; else caching = ttm_cached; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c index 4c29b60460d48d..9c7d14536ab672 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c @@ -1645,7 +1645,8 @@ nvkm_device_pci_func = { .irq = nvkm_device_pci_irq, .resource_addr = nvkm_device_pci_resource_addr, .resource_size = nvkm_device_pci_resource_size, - .cpu_coherent = !IS_ENABLED(CONFIG_ARM), + .cpu_coherent = !IS_ENABLED(CONFIG_ARM) && + !IS_ENABLED(CONFIG_DRM_FORCE_DMA_WRITE_COMBINED_MAPPINGS), }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c index 7ce1b65e2c1c29..23681ed7e23899 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c @@ -248,9 +248,8 @@ nvkm_fb_dtor(struct nvkm_subdev *subdev) nvkm_falcon_fw_dtor(&fb->vpr_scrubber); if (fb->sysmem.flush_page) { - dma_unmap_page(subdev->device->dev, fb->sysmem.flush_page_addr, - PAGE_SIZE, DMA_BIDIRECTIONAL); - __free_page(fb->sysmem.flush_page); + dma_free_coherent(subdev->device->dev, PAGE_SIZE, + fb->sysmem.flush_page, fb->sysmem.flush_page_addr); } if (fb->func->dtor) @@ -279,14 +278,11 @@ nvkm_fb_ctor(const struct nvkm_fb_func *func, struct nvkm_device *device, mutex_init(&fb->tags.mutex); if (func->sysmem.flush_page_init) { - fb->sysmem.flush_page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); + fb->sysmem.flush_page = dma_alloc_coherent(device->dev, PAGE_SIZE, + &fb->sysmem.flush_page_addr, + GFP_KERNEL | __GFP_ZERO); if (!fb->sysmem.flush_page) return -ENOMEM; - - fb->sysmem.flush_page_addr = dma_map_page(device->dev, fb->sysmem.flush_page, - 0, PAGE_SIZE, DMA_BIDIRECTIONAL); - if (dma_mapping_error(device->dev, fb->sysmem.flush_page_addr)) - return -EFAULT; } return 0; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c index a575a8dbf727df..1b6a1009d33f3c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c @@ -1571,12 +1571,12 @@ nvkm_gsp_sg_free(struct nvkm_device *device, struct sg_table *sgt) struct scatterlist *sgl; int i; - dma_unmap_sgtable(device->dev, sgt, DMA_BIDIRECTIONAL, 0); - for_each_sgtable_sg(sgt, sgl, i) { - struct page *page = sg_page(sgl); + void *cpu_addr = sg_virt(sgl); + dma_addr_t dma_addr = sg_dma_address(sgl); - __free_page(page); + if (cpu_addr && dma_addr) + dma_free_coherent(device->dev, PAGE_SIZE, cpu_addr, dma_addr); } sg_free_table(sgt); @@ -1594,21 +1594,23 @@ nvkm_gsp_sg(struct nvkm_device *device, u64 size, struct sg_table *sgt) return ret; for_each_sgtable_sg(sgt, sgl, i) { - struct page *page = alloc_page(GFP_KERNEL); + void *cpu_addr; + dma_addr_t dma_addr; - if (!page) { + cpu_addr = dma_alloc_coherent(device->dev, PAGE_SIZE, + &dma_addr, GFP_KERNEL); + if (!cpu_addr) { nvkm_gsp_sg_free(device, sgt); return -ENOMEM; } - sg_set_page(sgl, page, PAGE_SIZE, 0); + /* XXX: unsafe to use virt_to_page with dma_alloc_coherent */ + sg_set_page(sgl, virt_to_page(cpu_addr), PAGE_SIZE, 0); + sg_dma_address(sgl) = dma_addr; + sg_dma_len(sgl) = PAGE_SIZE; } - ret = dma_map_sgtable(device->dev, sgt, DMA_BIDIRECTIONAL, 0); - if (ret) - nvkm_gsp_sg_free(device, sgt); - - return ret; + return 0; } static void diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mem.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mem.c index 92e363dbbc5a6e..21438193b967f7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mem.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mem.c @@ -133,8 +133,14 @@ int nvkm_mem_map_host(struct nvkm_memory *memory, void **pmap) { struct nvkm_mem *mem = nvkm_mem(memory); + pgprot_t prot = PAGE_KERNEL; + +#ifdef CONFIG_DRM_FORCE_DMA_WRITE_COMBINED_MAPPINGS + prot = pgprot_writecombine(prot); +#endif + if (mem->mem) { - *pmap = vmap(mem->mem, mem->pages, VM_MAP, PAGE_KERNEL); + *pmap = vmap(mem->mem, mem->pages, VM_MAP, prot); return *pmap ? 0 : -EFAULT; } return -EINVAL; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index acbbca9d5c92f0..7c694b887df652 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -359,7 +359,8 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, if (ret) return ret; - if (num_pages == 1 && ttm->caching == ttm_cached && + if (!IS_ENABLED(CONFIG_ARM64) && + num_pages == 1 && ttm->caching == ttm_cached && !(man->use_tt && (ttm->page_flags & TTM_TT_FLAG_DECRYPTED))) { /* * We're mapping a single page, and the desired diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c index b3fffe7b5062a9..6966495bdd499b 100644 --- a/drivers/gpu/drm/ttm/ttm_module.c +++ b/drivers/gpu/drm/ttm/ttm_module.c @@ -63,7 +63,11 @@ pgprot_t ttm_prot_from_caching(enum ttm_caching caching, pgprot_t tmp) { /* Cached mappings need no adjustment */ if (caching == ttm_cached) +#ifdef CONFIG_ARM64 + return pgprot_dmacoherent(tmp); +#else return tmp; +#endif #if defined(__i386__) || defined(__x86_64__) if (caching == ttm_write_combined) diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 506e257dfba850..f1d31e5cd9143f 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -154,6 +154,10 @@ static void ttm_tt_init_fields(struct ttm_tt *ttm, enum ttm_caching caching, unsigned long extra_pages) { +#ifdef CONFIG_DRM_FORCE_DMA_WRITE_COMBINED_MAPPINGS + if (caching == ttm_cached) + caching = ttm_write_combined; +#endif ttm->num_pages = (PAGE_ALIGN(bo->base.size) >> PAGE_SHIFT) + extra_pages; ttm->page_flags = page_flags; ttm->dma_address = NULL; diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h index 08e0e3ffad1319..ce86c55365a314 100644 --- a/include/drm/drm_cache.h +++ b/include/drm/drm_cache.h @@ -45,6 +45,9 @@ bool drm_need_swiotlb(int dma_bits); static inline bool drm_arch_can_wc_memory(void) { +#ifdef CONFIG_DRM_FORCE_DMA_WRITE_COMBINED_MAPPINGS + return true; +#endif #if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE) return false; #elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON64) diff --git a/kernel/resource.c b/kernel/resource.c index edbe8ef7e8efd6..c6d53ddbcbf468 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -389,6 +389,7 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end, .flags = p->flags, .desc = p->desc, .parent = p->parent, + .name = p->name, }; } @@ -566,6 +567,40 @@ int __weak page_is_ram(unsigned long pfn) } EXPORT_SYMBOL_GPL(page_is_ram); +#ifdef CONFIG_ARM64 +static int pci_res_check(struct resource *res, void *arg) +{ + if (!res->name) + return 1; + + return strncmp(res->name, "PCI", 3); +} + +bool range_is_pci(phys_addr_t phys_addr, size_t size) +{ + u64 start, end; + int ret; + + start = phys_addr; + end = phys_addr + size; + + /* Check 32-bit MMIO */ + ret = walk_iomem_res_desc(IORES_DESC_NONE, IORESOURCE_MEM, + start, end, NULL, pci_res_check); + if (!ret) + return true; + + /* Check 64-bit MMIO */ + ret = walk_iomem_res_desc(IORES_DESC_NONE, IORESOURCE_MEM_64, + start, end, NULL, pci_res_check); + if (!ret) + return true; + + return false; +} +EXPORT_SYMBOL_GPL(range_is_pci); +#endif + static int __region_intersects(struct resource *parent, resource_size_t start, size_t size, unsigned long flags, unsigned long desc)