| 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* |
| 3 | * User address space access functions. |
| 4 | * |
| 5 | * Copyright 1997 Andi Kleen <ak@muc.de> |
| 6 | * Copyright 1997 Linus Torvalds |
| 7 | * Copyright 2002 Andi Kleen <ak@suse.de> |
| 8 | */ |
| 9 | #include <linux/export.h> |
| 10 | #include <linux/uaccess.h> |
| 11 | #include <linux/highmem.h> |
| 12 | #include <linux/libnvdimm.h> |
| 13 | |
| 14 | /* |
| 15 | * Zero Userspace |
| 16 | */ |
| 17 | |
| 18 | #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE |
| 19 | /** |
| 20 | * clean_cache_range - write back a cache range with CLWB |
| 21 | * @addr: virtual start address |
| 22 | * @size: number of bytes to write back |
| 23 | * |
| 24 | * Write back a cache range using the CLWB (cache line write back) |
| 25 | * instruction. Note that @size is internally rounded up to be cache |
| 26 | * line size aligned. |
| 27 | */ |
| 28 | static void clean_cache_range(void *addr, size_t size) |
| 29 | { |
| 30 | u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; |
| 31 | unsigned long clflush_mask = x86_clflush_size - 1; |
| 32 | void *vend = addr + size; |
| 33 | void *p; |
| 34 | |
| 35 | for (p = (void *)((unsigned long)addr & ~clflush_mask); |
| 36 | p < vend; p += x86_clflush_size) |
| 37 | clwb(p: p); |
| 38 | } |
| 39 | |
| 40 | void arch_wb_cache_pmem(void *addr, size_t size) |
| 41 | { |
| 42 | clean_cache_range(addr, size); |
| 43 | } |
| 44 | EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); |
| 45 | |
| 46 | long __copy_user_flushcache(void *dst, const void __user *src, unsigned size) |
| 47 | { |
| 48 | unsigned long flushed, dest = (unsigned long) dst; |
| 49 | long rc; |
| 50 | |
| 51 | stac(); |
| 52 | rc = __copy_user_nocache(dst, src, size); |
| 53 | clac(); |
| 54 | |
| 55 | /* |
| 56 | * __copy_user_nocache() uses non-temporal stores for the bulk |
| 57 | * of the transfer, but we need to manually flush if the |
| 58 | * transfer is unaligned. A cached memory copy is used when |
| 59 | * destination or size is not naturally aligned. That is: |
| 60 | * - Require 8-byte alignment when size is 8 bytes or larger. |
| 61 | * - Require 4-byte alignment when size is 4 bytes. |
| 62 | */ |
| 63 | if (size < 8) { |
| 64 | if (!IS_ALIGNED(dest, 4) || size != 4) |
| 65 | clean_cache_range(addr: dst, size); |
| 66 | } else { |
| 67 | if (!IS_ALIGNED(dest, 8)) { |
| 68 | dest = ALIGN(dest, boot_cpu_data.x86_clflush_size); |
| 69 | clean_cache_range(addr: dst, size: 1); |
| 70 | } |
| 71 | |
| 72 | flushed = dest - (unsigned long) dst; |
| 73 | if (size > flushed && !IS_ALIGNED(size - flushed, 8)) |
| 74 | clean_cache_range(addr: dst + size - 1, size: 1); |
| 75 | } |
| 76 | |
| 77 | return rc; |
| 78 | } |
| 79 | |
| 80 | void __memcpy_flushcache(void *_dst, const void *_src, size_t size) |
| 81 | { |
| 82 | unsigned long dest = (unsigned long) _dst; |
| 83 | unsigned long source = (unsigned long) _src; |
| 84 | |
| 85 | /* cache copy and flush to align dest */ |
| 86 | if (!IS_ALIGNED(dest, 8)) { |
| 87 | size_t len = min_t(size_t, size, ALIGN(dest, 8) - dest); |
| 88 | |
| 89 | memcpy((void *) dest, (void *) source, len); |
| 90 | clean_cache_range(addr: (void *) dest, size: len); |
| 91 | dest += len; |
| 92 | source += len; |
| 93 | size -= len; |
| 94 | if (!size) |
| 95 | return; |
| 96 | } |
| 97 | |
| 98 | /* 4x8 movnti loop */ |
| 99 | while (size >= 32) { |
| 100 | asm("movq (%0), %%r8\n" |
| 101 | "movq 8(%0), %%r9\n" |
| 102 | "movq 16(%0), %%r10\n" |
| 103 | "movq 24(%0), %%r11\n" |
| 104 | "movnti %%r8, (%1)\n" |
| 105 | "movnti %%r9, 8(%1)\n" |
| 106 | "movnti %%r10, 16(%1)\n" |
| 107 | "movnti %%r11, 24(%1)\n" |
| 108 | :: "r" (source), "r" (dest) |
| 109 | : "memory" , "r8" , "r9" , "r10" , "r11" ); |
| 110 | dest += 32; |
| 111 | source += 32; |
| 112 | size -= 32; |
| 113 | } |
| 114 | |
| 115 | /* 1x8 movnti loop */ |
| 116 | while (size >= 8) { |
| 117 | asm("movq (%0), %%r8\n" |
| 118 | "movnti %%r8, (%1)\n" |
| 119 | :: "r" (source), "r" (dest) |
| 120 | : "memory" , "r8" ); |
| 121 | dest += 8; |
| 122 | source += 8; |
| 123 | size -= 8; |
| 124 | } |
| 125 | |
| 126 | /* 1x4 movnti loop */ |
| 127 | while (size >= 4) { |
| 128 | asm("movl (%0), %%r8d\n" |
| 129 | "movnti %%r8d, (%1)\n" |
| 130 | :: "r" (source), "r" (dest) |
| 131 | : "memory" , "r8" ); |
| 132 | dest += 4; |
| 133 | source += 4; |
| 134 | size -= 4; |
| 135 | } |
| 136 | |
| 137 | /* cache copy for remaining bytes */ |
| 138 | if (size) { |
| 139 | memcpy((void *) dest, (void *) source, size); |
| 140 | clean_cache_range(addr: (void *) dest, size); |
| 141 | } |
| 142 | } |
| 143 | EXPORT_SYMBOL_GPL(__memcpy_flushcache); |
| 144 | #endif |
| 145 | |