1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | /* |
4 | * Copyright 2019, 2020 Amazon.com, Inc. or its affiliates. All rights reserved. |
5 | * |
6 | * User extended attribute client side cache functions. |
7 | * |
8 | * Author: Frank van der Linden <fllinden@amazon.com> |
9 | */ |
10 | #include <linux/errno.h> |
11 | #include <linux/nfs_fs.h> |
12 | #include <linux/hashtable.h> |
13 | #include <linux/refcount.h> |
14 | #include <uapi/linux/xattr.h> |
15 | |
16 | #include "nfs4_fs.h" |
17 | #include "internal.h" |
18 | |
19 | /* |
20 | * User extended attributes client side caching is implemented by having |
21 | * a cache structure attached to NFS inodes. This structure is allocated |
22 | * when needed, and freed when the cache is zapped. |
23 | * |
24 | * The cache structure contains as hash table of entries, and a pointer |
25 | * to a special-cased entry for the listxattr cache. |
26 | * |
27 | * Accessing and allocating / freeing the caches is done via reference |
28 | * counting. The cache entries use a similar refcounting scheme. |
29 | * |
30 | * This makes freeing a cache, both from the shrinker and from the |
31 | * zap cache path, easy. It also means that, in current use cases, |
32 | * the large majority of inodes will not waste any memory, as they |
33 | * will never have any user extended attributes assigned to them. |
34 | * |
35 | * Attribute entries are hashed in to a simple hash table. They are |
36 | * also part of an LRU. |
37 | * |
38 | * There are three shrinkers. |
39 | * |
40 | * Two shrinkers deal with the cache entries themselves: one for |
41 | * large entries (> PAGE_SIZE), and one for smaller entries. The |
42 | * shrinker for the larger entries works more aggressively than |
43 | * those for the smaller entries. |
44 | * |
45 | * The other shrinker frees the cache structures themselves. |
46 | */ |
47 | |
48 | /* |
49 | * 64 buckets is a good default. There is likely no reasonable |
50 | * workload that uses more than even 64 user extended attributes. |
51 | * You can certainly add a lot more - but you get what you ask for |
52 | * in those circumstances. |
53 | */ |
54 | #define NFS4_XATTR_HASH_SIZE 64 |
55 | |
56 | #define NFSDBG_FACILITY NFSDBG_XATTRCACHE |
57 | |
58 | struct nfs4_xattr_cache; |
59 | struct nfs4_xattr_entry; |
60 | |
61 | struct nfs4_xattr_bucket { |
62 | spinlock_t lock; |
63 | struct hlist_head hlist; |
64 | struct nfs4_xattr_cache *cache; |
65 | bool draining; |
66 | }; |
67 | |
68 | struct nfs4_xattr_cache { |
69 | struct kref ref; |
70 | struct nfs4_xattr_bucket buckets[NFS4_XATTR_HASH_SIZE]; |
71 | struct list_head lru; |
72 | struct list_head dispose; |
73 | atomic_long_t nent; |
74 | spinlock_t listxattr_lock; |
75 | struct inode *inode; |
76 | struct nfs4_xattr_entry *listxattr; |
77 | }; |
78 | |
79 | struct nfs4_xattr_entry { |
80 | struct kref ref; |
81 | struct hlist_node hnode; |
82 | struct list_head lru; |
83 | struct list_head dispose; |
84 | char *xattr_name; |
85 | void *xattr_value; |
86 | size_t xattr_size; |
87 | struct nfs4_xattr_bucket *bucket; |
88 | uint32_t flags; |
89 | }; |
90 | |
91 | #define NFS4_XATTR_ENTRY_EXTVAL 0x0001 |
92 | |
93 | /* |
94 | * LRU list of NFS inodes that have xattr caches. |
95 | */ |
96 | static struct list_lru nfs4_xattr_cache_lru; |
97 | static struct list_lru nfs4_xattr_entry_lru; |
98 | static struct list_lru nfs4_xattr_large_entry_lru; |
99 | |
100 | static struct kmem_cache *nfs4_xattr_cache_cachep; |
101 | |
102 | /* |
103 | * Hashing helper functions. |
104 | */ |
105 | static void |
106 | nfs4_xattr_hash_init(struct nfs4_xattr_cache *cache) |
107 | { |
108 | unsigned int i; |
109 | |
110 | for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { |
111 | INIT_HLIST_HEAD(&cache->buckets[i].hlist); |
112 | spin_lock_init(&cache->buckets[i].lock); |
113 | cache->buckets[i].cache = cache; |
114 | cache->buckets[i].draining = false; |
115 | } |
116 | } |
117 | |
118 | /* |
119 | * Locking order: |
120 | * 1. inode i_lock or bucket lock |
121 | * 2. list_lru lock (taken by list_lru_* functions) |
122 | */ |
123 | |
124 | /* |
125 | * Wrapper functions to add a cache entry to the right LRU. |
126 | */ |
127 | static bool |
128 | nfs4_xattr_entry_lru_add(struct nfs4_xattr_entry *entry) |
129 | { |
130 | struct list_lru *lru; |
131 | |
132 | lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ? |
133 | &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; |
134 | |
135 | return list_lru_add(lru, item: &entry->lru); |
136 | } |
137 | |
138 | static bool |
139 | nfs4_xattr_entry_lru_del(struct nfs4_xattr_entry *entry) |
140 | { |
141 | struct list_lru *lru; |
142 | |
143 | lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ? |
144 | &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; |
145 | |
146 | return list_lru_del(lru, item: &entry->lru); |
147 | } |
148 | |
149 | /* |
150 | * This function allocates cache entries. They are the normal |
151 | * extended attribute name/value pairs, but may also be a listxattr |
152 | * cache. Those allocations use the same entry so that they can be |
153 | * treated as one by the memory shrinker. |
154 | * |
155 | * xattr cache entries are allocated together with names. If the |
156 | * value fits in to one page with the entry structure and the name, |
157 | * it will also be part of the same allocation (kmalloc). This is |
158 | * expected to be the vast majority of cases. Larger allocations |
159 | * have a value pointer that is allocated separately by kvmalloc. |
160 | * |
161 | * Parameters: |
162 | * |
163 | * @name: Name of the extended attribute. NULL for listxattr cache |
164 | * entry. |
165 | * @value: Value of attribute, or listxattr cache. NULL if the |
166 | * value is to be copied from pages instead. |
167 | * @pages: Pages to copy the value from, if not NULL. Passed in to |
168 | * make it easier to copy the value after an RPC, even if |
169 | * the value will not be passed up to application (e.g. |
170 | * for a 'query' getxattr with NULL buffer). |
171 | * @len: Length of the value. Can be 0 for zero-length attributes. |
172 | * @value and @pages will be NULL if @len is 0. |
173 | */ |
174 | static struct nfs4_xattr_entry * |
175 | nfs4_xattr_alloc_entry(const char *name, const void *value, |
176 | struct page **pages, size_t len) |
177 | { |
178 | struct nfs4_xattr_entry *entry; |
179 | void *valp; |
180 | char *namep; |
181 | size_t alloclen, slen; |
182 | char *buf; |
183 | uint32_t flags; |
184 | |
185 | BUILD_BUG_ON(sizeof(struct nfs4_xattr_entry) + |
186 | XATTR_NAME_MAX + 1 > PAGE_SIZE); |
187 | |
188 | alloclen = sizeof(struct nfs4_xattr_entry); |
189 | if (name != NULL) { |
190 | slen = strlen(name) + 1; |
191 | alloclen += slen; |
192 | } else |
193 | slen = 0; |
194 | |
195 | if (alloclen + len <= PAGE_SIZE) { |
196 | alloclen += len; |
197 | flags = 0; |
198 | } else { |
199 | flags = NFS4_XATTR_ENTRY_EXTVAL; |
200 | } |
201 | |
202 | buf = kmalloc(size: alloclen, GFP_KERNEL); |
203 | if (buf == NULL) |
204 | return NULL; |
205 | entry = (struct nfs4_xattr_entry *)buf; |
206 | |
207 | if (name != NULL) { |
208 | namep = buf + sizeof(struct nfs4_xattr_entry); |
209 | memcpy(namep, name, slen); |
210 | } else { |
211 | namep = NULL; |
212 | } |
213 | |
214 | |
215 | if (flags & NFS4_XATTR_ENTRY_EXTVAL) { |
216 | valp = kvmalloc(size: len, GFP_KERNEL); |
217 | if (valp == NULL) { |
218 | kfree(objp: buf); |
219 | return NULL; |
220 | } |
221 | } else if (len != 0) { |
222 | valp = buf + sizeof(struct nfs4_xattr_entry) + slen; |
223 | } else |
224 | valp = NULL; |
225 | |
226 | if (valp != NULL) { |
227 | if (value != NULL) |
228 | memcpy(valp, value, len); |
229 | else |
230 | _copy_from_pages(p: valp, pages, pgbase: 0, len); |
231 | } |
232 | |
233 | entry->flags = flags; |
234 | entry->xattr_value = valp; |
235 | kref_init(kref: &entry->ref); |
236 | entry->xattr_name = namep; |
237 | entry->xattr_size = len; |
238 | entry->bucket = NULL; |
239 | INIT_LIST_HEAD(list: &entry->lru); |
240 | INIT_LIST_HEAD(list: &entry->dispose); |
241 | INIT_HLIST_NODE(h: &entry->hnode); |
242 | |
243 | return entry; |
244 | } |
245 | |
246 | static void |
247 | nfs4_xattr_free_entry(struct nfs4_xattr_entry *entry) |
248 | { |
249 | if (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) |
250 | kvfree(addr: entry->xattr_value); |
251 | kfree(objp: entry); |
252 | } |
253 | |
254 | static void |
255 | nfs4_xattr_free_entry_cb(struct kref *kref) |
256 | { |
257 | struct nfs4_xattr_entry *entry; |
258 | |
259 | entry = container_of(kref, struct nfs4_xattr_entry, ref); |
260 | |
261 | if (WARN_ON(!list_empty(&entry->lru))) |
262 | return; |
263 | |
264 | nfs4_xattr_free_entry(entry); |
265 | } |
266 | |
267 | static void |
268 | nfs4_xattr_free_cache_cb(struct kref *kref) |
269 | { |
270 | struct nfs4_xattr_cache *cache; |
271 | int i; |
272 | |
273 | cache = container_of(kref, struct nfs4_xattr_cache, ref); |
274 | |
275 | for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { |
276 | if (WARN_ON(!hlist_empty(&cache->buckets[i].hlist))) |
277 | return; |
278 | cache->buckets[i].draining = false; |
279 | } |
280 | |
281 | cache->listxattr = NULL; |
282 | |
283 | kmem_cache_free(s: nfs4_xattr_cache_cachep, objp: cache); |
284 | |
285 | } |
286 | |
287 | static struct nfs4_xattr_cache * |
288 | nfs4_xattr_alloc_cache(void) |
289 | { |
290 | struct nfs4_xattr_cache *cache; |
291 | |
292 | cache = kmem_cache_alloc(cachep: nfs4_xattr_cache_cachep, GFP_KERNEL); |
293 | if (cache == NULL) |
294 | return NULL; |
295 | |
296 | kref_init(kref: &cache->ref); |
297 | atomic_long_set(v: &cache->nent, i: 0); |
298 | |
299 | return cache; |
300 | } |
301 | |
302 | /* |
303 | * Set the listxattr cache, which is a special-cased cache entry. |
304 | * The special value ERR_PTR(-ESTALE) is used to indicate that |
305 | * the cache is being drained - this prevents a new listxattr |
306 | * cache from being added to what is now a stale cache. |
307 | */ |
308 | static int |
309 | nfs4_xattr_set_listcache(struct nfs4_xattr_cache *cache, |
310 | struct nfs4_xattr_entry *new) |
311 | { |
312 | struct nfs4_xattr_entry *old; |
313 | int ret = 1; |
314 | |
315 | spin_lock(lock: &cache->listxattr_lock); |
316 | |
317 | old = cache->listxattr; |
318 | |
319 | if (old == ERR_PTR(error: -ESTALE)) { |
320 | ret = 0; |
321 | goto out; |
322 | } |
323 | |
324 | cache->listxattr = new; |
325 | if (new != NULL && new != ERR_PTR(error: -ESTALE)) |
326 | nfs4_xattr_entry_lru_add(entry: new); |
327 | |
328 | if (old != NULL) { |
329 | nfs4_xattr_entry_lru_del(entry: old); |
330 | kref_put(kref: &old->ref, release: nfs4_xattr_free_entry_cb); |
331 | } |
332 | out: |
333 | spin_unlock(lock: &cache->listxattr_lock); |
334 | |
335 | return ret; |
336 | } |
337 | |
338 | /* |
339 | * Unlink a cache from its parent inode, clearing out an invalid |
340 | * cache. Must be called with i_lock held. |
341 | */ |
342 | static struct nfs4_xattr_cache * |
343 | nfs4_xattr_cache_unlink(struct inode *inode) |
344 | { |
345 | struct nfs_inode *nfsi; |
346 | struct nfs4_xattr_cache *oldcache; |
347 | |
348 | nfsi = NFS_I(inode); |
349 | |
350 | oldcache = nfsi->xattr_cache; |
351 | if (oldcache != NULL) { |
352 | list_lru_del(lru: &nfs4_xattr_cache_lru, item: &oldcache->lru); |
353 | oldcache->inode = NULL; |
354 | } |
355 | nfsi->xattr_cache = NULL; |
356 | nfsi->cache_validity &= ~NFS_INO_INVALID_XATTR; |
357 | |
358 | return oldcache; |
359 | |
360 | } |
361 | |
362 | /* |
363 | * Discard a cache. Called by get_cache() if there was an old, |
364 | * invalid cache. Can also be called from a shrinker callback. |
365 | * |
366 | * The cache is dead, it has already been unlinked from its inode, |
367 | * and no longer appears on the cache LRU list. |
368 | * |
369 | * Mark all buckets as draining, so that no new entries are added. This |
370 | * could still happen in the unlikely, but possible case that another |
371 | * thread had grabbed a reference before it was unlinked from the inode, |
372 | * and is still holding it for an add operation. |
373 | * |
374 | * Remove all entries from the LRU lists, so that there is no longer |
375 | * any way to 'find' this cache. Then, remove the entries from the hash |
376 | * table. |
377 | * |
378 | * At that point, the cache will remain empty and can be freed when the final |
379 | * reference drops, which is very likely the kref_put at the end of |
380 | * this function, or the one called immediately afterwards in the |
381 | * shrinker callback. |
382 | */ |
383 | static void |
384 | nfs4_xattr_discard_cache(struct nfs4_xattr_cache *cache) |
385 | { |
386 | unsigned int i; |
387 | struct nfs4_xattr_entry *entry; |
388 | struct nfs4_xattr_bucket *bucket; |
389 | struct hlist_node *n; |
390 | |
391 | nfs4_xattr_set_listcache(cache, new: ERR_PTR(error: -ESTALE)); |
392 | |
393 | for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { |
394 | bucket = &cache->buckets[i]; |
395 | |
396 | spin_lock(lock: &bucket->lock); |
397 | bucket->draining = true; |
398 | hlist_for_each_entry_safe(entry, n, &bucket->hlist, hnode) { |
399 | nfs4_xattr_entry_lru_del(entry); |
400 | hlist_del_init(n: &entry->hnode); |
401 | kref_put(kref: &entry->ref, release: nfs4_xattr_free_entry_cb); |
402 | } |
403 | spin_unlock(lock: &bucket->lock); |
404 | } |
405 | |
406 | atomic_long_set(v: &cache->nent, i: 0); |
407 | |
408 | kref_put(kref: &cache->ref, release: nfs4_xattr_free_cache_cb); |
409 | } |
410 | |
411 | /* |
412 | * Get a referenced copy of the cache structure. Avoid doing allocs |
413 | * while holding i_lock. Which means that we do some optimistic allocation, |
414 | * and might have to free the result in rare cases. |
415 | * |
416 | * This function only checks the NFS_INO_INVALID_XATTR cache validity bit |
417 | * and acts accordingly, replacing the cache when needed. For the read case |
418 | * (!add), this means that the caller must make sure that the cache |
419 | * is valid before caling this function. getxattr and listxattr call |
420 | * revalidate_inode to do this. The attribute cache timeout (for the |
421 | * non-delegated case) is expected to be dealt with in the revalidate |
422 | * call. |
423 | */ |
424 | |
425 | static struct nfs4_xattr_cache * |
426 | nfs4_xattr_get_cache(struct inode *inode, int add) |
427 | { |
428 | struct nfs_inode *nfsi; |
429 | struct nfs4_xattr_cache *cache, *oldcache, *newcache; |
430 | |
431 | nfsi = NFS_I(inode); |
432 | |
433 | cache = oldcache = NULL; |
434 | |
435 | spin_lock(lock: &inode->i_lock); |
436 | |
437 | if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) |
438 | oldcache = nfs4_xattr_cache_unlink(inode); |
439 | else |
440 | cache = nfsi->xattr_cache; |
441 | |
442 | if (cache != NULL) |
443 | kref_get(kref: &cache->ref); |
444 | |
445 | spin_unlock(lock: &inode->i_lock); |
446 | |
447 | if (add && cache == NULL) { |
448 | newcache = NULL; |
449 | |
450 | cache = nfs4_xattr_alloc_cache(); |
451 | if (cache == NULL) |
452 | goto out; |
453 | |
454 | spin_lock(lock: &inode->i_lock); |
455 | if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) { |
456 | /* |
457 | * The cache was invalidated again. Give up, |
458 | * since what we want to enter is now likely |
459 | * outdated anyway. |
460 | */ |
461 | spin_unlock(lock: &inode->i_lock); |
462 | kref_put(kref: &cache->ref, release: nfs4_xattr_free_cache_cb); |
463 | cache = NULL; |
464 | goto out; |
465 | } |
466 | |
467 | /* |
468 | * Check if someone beat us to it. |
469 | */ |
470 | if (nfsi->xattr_cache != NULL) { |
471 | newcache = nfsi->xattr_cache; |
472 | kref_get(kref: &newcache->ref); |
473 | } else { |
474 | kref_get(kref: &cache->ref); |
475 | nfsi->xattr_cache = cache; |
476 | cache->inode = inode; |
477 | list_lru_add(lru: &nfs4_xattr_cache_lru, item: &cache->lru); |
478 | } |
479 | |
480 | spin_unlock(lock: &inode->i_lock); |
481 | |
482 | /* |
483 | * If there was a race, throw away the cache we just |
484 | * allocated, and use the new one allocated by someone |
485 | * else. |
486 | */ |
487 | if (newcache != NULL) { |
488 | kref_put(kref: &cache->ref, release: nfs4_xattr_free_cache_cb); |
489 | cache = newcache; |
490 | } |
491 | } |
492 | |
493 | out: |
494 | /* |
495 | * Discard the now orphaned old cache. |
496 | */ |
497 | if (oldcache != NULL) |
498 | nfs4_xattr_discard_cache(cache: oldcache); |
499 | |
500 | return cache; |
501 | } |
502 | |
503 | static inline struct nfs4_xattr_bucket * |
504 | nfs4_xattr_hash_bucket(struct nfs4_xattr_cache *cache, const char *name) |
505 | { |
506 | return &cache->buckets[jhash(key: name, strlen(name), initval: 0) & |
507 | (ARRAY_SIZE(cache->buckets) - 1)]; |
508 | } |
509 | |
510 | static struct nfs4_xattr_entry * |
511 | nfs4_xattr_get_entry(struct nfs4_xattr_bucket *bucket, const char *name) |
512 | { |
513 | struct nfs4_xattr_entry *entry; |
514 | |
515 | entry = NULL; |
516 | |
517 | hlist_for_each_entry(entry, &bucket->hlist, hnode) { |
518 | if (!strcmp(entry->xattr_name, name)) |
519 | break; |
520 | } |
521 | |
522 | return entry; |
523 | } |
524 | |
525 | static int |
526 | nfs4_xattr_hash_add(struct nfs4_xattr_cache *cache, |
527 | struct nfs4_xattr_entry *entry) |
528 | { |
529 | struct nfs4_xattr_bucket *bucket; |
530 | struct nfs4_xattr_entry *oldentry = NULL; |
531 | int ret = 1; |
532 | |
533 | bucket = nfs4_xattr_hash_bucket(cache, name: entry->xattr_name); |
534 | entry->bucket = bucket; |
535 | |
536 | spin_lock(lock: &bucket->lock); |
537 | |
538 | if (bucket->draining) { |
539 | ret = 0; |
540 | goto out; |
541 | } |
542 | |
543 | oldentry = nfs4_xattr_get_entry(bucket, name: entry->xattr_name); |
544 | if (oldentry != NULL) { |
545 | hlist_del_init(n: &oldentry->hnode); |
546 | nfs4_xattr_entry_lru_del(entry: oldentry); |
547 | } else { |
548 | atomic_long_inc(v: &cache->nent); |
549 | } |
550 | |
551 | hlist_add_head(n: &entry->hnode, h: &bucket->hlist); |
552 | nfs4_xattr_entry_lru_add(entry); |
553 | |
554 | out: |
555 | spin_unlock(lock: &bucket->lock); |
556 | |
557 | if (oldentry != NULL) |
558 | kref_put(kref: &oldentry->ref, release: nfs4_xattr_free_entry_cb); |
559 | |
560 | return ret; |
561 | } |
562 | |
563 | static void |
564 | nfs4_xattr_hash_remove(struct nfs4_xattr_cache *cache, const char *name) |
565 | { |
566 | struct nfs4_xattr_bucket *bucket; |
567 | struct nfs4_xattr_entry *entry; |
568 | |
569 | bucket = nfs4_xattr_hash_bucket(cache, name); |
570 | |
571 | spin_lock(lock: &bucket->lock); |
572 | |
573 | entry = nfs4_xattr_get_entry(bucket, name); |
574 | if (entry != NULL) { |
575 | hlist_del_init(n: &entry->hnode); |
576 | nfs4_xattr_entry_lru_del(entry); |
577 | atomic_long_dec(v: &cache->nent); |
578 | } |
579 | |
580 | spin_unlock(lock: &bucket->lock); |
581 | |
582 | if (entry != NULL) |
583 | kref_put(kref: &entry->ref, release: nfs4_xattr_free_entry_cb); |
584 | } |
585 | |
586 | static struct nfs4_xattr_entry * |
587 | nfs4_xattr_hash_find(struct nfs4_xattr_cache *cache, const char *name) |
588 | { |
589 | struct nfs4_xattr_bucket *bucket; |
590 | struct nfs4_xattr_entry *entry; |
591 | |
592 | bucket = nfs4_xattr_hash_bucket(cache, name); |
593 | |
594 | spin_lock(lock: &bucket->lock); |
595 | |
596 | entry = nfs4_xattr_get_entry(bucket, name); |
597 | if (entry != NULL) |
598 | kref_get(kref: &entry->ref); |
599 | |
600 | spin_unlock(lock: &bucket->lock); |
601 | |
602 | return entry; |
603 | } |
604 | |
605 | /* |
606 | * Entry point to retrieve an entry from the cache. |
607 | */ |
608 | ssize_t nfs4_xattr_cache_get(struct inode *inode, const char *name, char *buf, |
609 | ssize_t buflen) |
610 | { |
611 | struct nfs4_xattr_cache *cache; |
612 | struct nfs4_xattr_entry *entry; |
613 | ssize_t ret; |
614 | |
615 | cache = nfs4_xattr_get_cache(inode, add: 0); |
616 | if (cache == NULL) |
617 | return -ENOENT; |
618 | |
619 | ret = 0; |
620 | entry = nfs4_xattr_hash_find(cache, name); |
621 | |
622 | if (entry != NULL) { |
623 | dprintk("%s: cache hit '%s', len %lu\n" , __func__, |
624 | entry->xattr_name, (unsigned long)entry->xattr_size); |
625 | if (buflen == 0) { |
626 | /* Length probe only */ |
627 | ret = entry->xattr_size; |
628 | } else if (buflen < entry->xattr_size) |
629 | ret = -ERANGE; |
630 | else { |
631 | memcpy(buf, entry->xattr_value, entry->xattr_size); |
632 | ret = entry->xattr_size; |
633 | } |
634 | kref_put(kref: &entry->ref, release: nfs4_xattr_free_entry_cb); |
635 | } else { |
636 | dprintk("%s: cache miss '%s'\n" , __func__, name); |
637 | ret = -ENOENT; |
638 | } |
639 | |
640 | kref_put(kref: &cache->ref, release: nfs4_xattr_free_cache_cb); |
641 | |
642 | return ret; |
643 | } |
644 | |
645 | /* |
646 | * Retrieve a cached list of xattrs from the cache. |
647 | */ |
648 | ssize_t nfs4_xattr_cache_list(struct inode *inode, char *buf, ssize_t buflen) |
649 | { |
650 | struct nfs4_xattr_cache *cache; |
651 | struct nfs4_xattr_entry *entry; |
652 | ssize_t ret; |
653 | |
654 | cache = nfs4_xattr_get_cache(inode, add: 0); |
655 | if (cache == NULL) |
656 | return -ENOENT; |
657 | |
658 | spin_lock(lock: &cache->listxattr_lock); |
659 | |
660 | entry = cache->listxattr; |
661 | |
662 | if (entry != NULL && entry != ERR_PTR(error: -ESTALE)) { |
663 | if (buflen == 0) { |
664 | /* Length probe only */ |
665 | ret = entry->xattr_size; |
666 | } else if (entry->xattr_size > buflen) |
667 | ret = -ERANGE; |
668 | else { |
669 | memcpy(buf, entry->xattr_value, entry->xattr_size); |
670 | ret = entry->xattr_size; |
671 | } |
672 | } else { |
673 | ret = -ENOENT; |
674 | } |
675 | |
676 | spin_unlock(lock: &cache->listxattr_lock); |
677 | |
678 | kref_put(kref: &cache->ref, release: nfs4_xattr_free_cache_cb); |
679 | |
680 | return ret; |
681 | } |
682 | |
683 | /* |
684 | * Add an xattr to the cache. |
685 | * |
686 | * This also invalidates the xattr list cache. |
687 | */ |
688 | void nfs4_xattr_cache_add(struct inode *inode, const char *name, |
689 | const char *buf, struct page **pages, ssize_t buflen) |
690 | { |
691 | struct nfs4_xattr_cache *cache; |
692 | struct nfs4_xattr_entry *entry; |
693 | |
694 | dprintk("%s: add '%s' len %lu\n" , __func__, |
695 | name, (unsigned long)buflen); |
696 | |
697 | cache = nfs4_xattr_get_cache(inode, add: 1); |
698 | if (cache == NULL) |
699 | return; |
700 | |
701 | entry = nfs4_xattr_alloc_entry(name, value: buf, pages, len: buflen); |
702 | if (entry == NULL) |
703 | goto out; |
704 | |
705 | (void)nfs4_xattr_set_listcache(cache, NULL); |
706 | |
707 | if (!nfs4_xattr_hash_add(cache, entry)) |
708 | kref_put(kref: &entry->ref, release: nfs4_xattr_free_entry_cb); |
709 | |
710 | out: |
711 | kref_put(kref: &cache->ref, release: nfs4_xattr_free_cache_cb); |
712 | } |
713 | |
714 | |
715 | /* |
716 | * Remove an xattr from the cache. |
717 | * |
718 | * This also invalidates the xattr list cache. |
719 | */ |
720 | void nfs4_xattr_cache_remove(struct inode *inode, const char *name) |
721 | { |
722 | struct nfs4_xattr_cache *cache; |
723 | |
724 | dprintk("%s: remove '%s'\n" , __func__, name); |
725 | |
726 | cache = nfs4_xattr_get_cache(inode, add: 0); |
727 | if (cache == NULL) |
728 | return; |
729 | |
730 | (void)nfs4_xattr_set_listcache(cache, NULL); |
731 | nfs4_xattr_hash_remove(cache, name); |
732 | |
733 | kref_put(kref: &cache->ref, release: nfs4_xattr_free_cache_cb); |
734 | } |
735 | |
736 | /* |
737 | * Cache listxattr output, replacing any possible old one. |
738 | */ |
739 | void nfs4_xattr_cache_set_list(struct inode *inode, const char *buf, |
740 | ssize_t buflen) |
741 | { |
742 | struct nfs4_xattr_cache *cache; |
743 | struct nfs4_xattr_entry *entry; |
744 | |
745 | cache = nfs4_xattr_get_cache(inode, add: 1); |
746 | if (cache == NULL) |
747 | return; |
748 | |
749 | entry = nfs4_xattr_alloc_entry(NULL, value: buf, NULL, len: buflen); |
750 | if (entry == NULL) |
751 | goto out; |
752 | |
753 | /* |
754 | * This is just there to be able to get to bucket->cache, |
755 | * which is obviously the same for all buckets, so just |
756 | * use bucket 0. |
757 | */ |
758 | entry->bucket = &cache->buckets[0]; |
759 | |
760 | if (!nfs4_xattr_set_listcache(cache, new: entry)) |
761 | kref_put(kref: &entry->ref, release: nfs4_xattr_free_entry_cb); |
762 | |
763 | out: |
764 | kref_put(kref: &cache->ref, release: nfs4_xattr_free_cache_cb); |
765 | } |
766 | |
767 | /* |
768 | * Zap the entire cache. Called when an inode is evicted. |
769 | */ |
770 | void nfs4_xattr_cache_zap(struct inode *inode) |
771 | { |
772 | struct nfs4_xattr_cache *oldcache; |
773 | |
774 | spin_lock(lock: &inode->i_lock); |
775 | oldcache = nfs4_xattr_cache_unlink(inode); |
776 | spin_unlock(lock: &inode->i_lock); |
777 | |
778 | if (oldcache) |
779 | nfs4_xattr_discard_cache(cache: oldcache); |
780 | } |
781 | |
782 | /* |
783 | * The entry LRU is shrunk more aggressively than the cache LRU, |
784 | * by settings @seeks to 1. |
785 | * |
786 | * Cache structures are freed only when they've become empty, after |
787 | * pruning all but one entry. |
788 | */ |
789 | |
790 | static unsigned long nfs4_xattr_cache_count(struct shrinker *shrink, |
791 | struct shrink_control *sc); |
792 | static unsigned long nfs4_xattr_entry_count(struct shrinker *shrink, |
793 | struct shrink_control *sc); |
794 | static unsigned long nfs4_xattr_cache_scan(struct shrinker *shrink, |
795 | struct shrink_control *sc); |
796 | static unsigned long nfs4_xattr_entry_scan(struct shrinker *shrink, |
797 | struct shrink_control *sc); |
798 | |
799 | static struct shrinker *nfs4_xattr_cache_shrinker; |
800 | static struct shrinker *nfs4_xattr_entry_shrinker; |
801 | static struct shrinker *nfs4_xattr_large_entry_shrinker; |
802 | |
803 | static enum lru_status |
804 | cache_lru_isolate(struct list_head *item, |
805 | struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) |
806 | { |
807 | struct list_head *dispose = arg; |
808 | struct inode *inode; |
809 | struct nfs4_xattr_cache *cache = container_of(item, |
810 | struct nfs4_xattr_cache, lru); |
811 | |
812 | if (atomic_long_read(v: &cache->nent) > 1) |
813 | return LRU_SKIP; |
814 | |
815 | /* |
816 | * If a cache structure is on the LRU list, we know that |
817 | * its inode is valid. Try to lock it to break the link. |
818 | * Since we're inverting the lock order here, only try. |
819 | */ |
820 | inode = cache->inode; |
821 | |
822 | if (!spin_trylock(lock: &inode->i_lock)) |
823 | return LRU_SKIP; |
824 | |
825 | kref_get(kref: &cache->ref); |
826 | |
827 | cache->inode = NULL; |
828 | NFS_I(inode)->xattr_cache = NULL; |
829 | NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_XATTR; |
830 | list_lru_isolate(list: lru, item: &cache->lru); |
831 | |
832 | spin_unlock(lock: &inode->i_lock); |
833 | |
834 | list_add_tail(new: &cache->dispose, head: dispose); |
835 | return LRU_REMOVED; |
836 | } |
837 | |
838 | static unsigned long |
839 | nfs4_xattr_cache_scan(struct shrinker *shrink, struct shrink_control *sc) |
840 | { |
841 | LIST_HEAD(dispose); |
842 | unsigned long freed; |
843 | struct nfs4_xattr_cache *cache; |
844 | |
845 | freed = list_lru_shrink_walk(lru: &nfs4_xattr_cache_lru, sc, |
846 | isolate: cache_lru_isolate, cb_arg: &dispose); |
847 | while (!list_empty(head: &dispose)) { |
848 | cache = list_first_entry(&dispose, struct nfs4_xattr_cache, |
849 | dispose); |
850 | list_del_init(entry: &cache->dispose); |
851 | nfs4_xattr_discard_cache(cache); |
852 | kref_put(kref: &cache->ref, release: nfs4_xattr_free_cache_cb); |
853 | } |
854 | |
855 | return freed; |
856 | } |
857 | |
858 | |
859 | static unsigned long |
860 | nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc) |
861 | { |
862 | unsigned long count; |
863 | |
864 | count = list_lru_shrink_count(lru: &nfs4_xattr_cache_lru, sc); |
865 | return vfs_pressure_ratio(val: count); |
866 | } |
867 | |
868 | static enum lru_status |
869 | entry_lru_isolate(struct list_head *item, |
870 | struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) |
871 | { |
872 | struct list_head *dispose = arg; |
873 | struct nfs4_xattr_bucket *bucket; |
874 | struct nfs4_xattr_cache *cache; |
875 | struct nfs4_xattr_entry *entry = container_of(item, |
876 | struct nfs4_xattr_entry, lru); |
877 | |
878 | bucket = entry->bucket; |
879 | cache = bucket->cache; |
880 | |
881 | /* |
882 | * Unhook the entry from its parent (either a cache bucket |
883 | * or a cache structure if it's a listxattr buf), so that |
884 | * it's no longer found. Then add it to the isolate list, |
885 | * to be freed later. |
886 | * |
887 | * In both cases, we're reverting lock order, so use |
888 | * trylock and skip the entry if we can't get the lock. |
889 | */ |
890 | if (entry->xattr_name != NULL) { |
891 | /* Regular cache entry */ |
892 | if (!spin_trylock(lock: &bucket->lock)) |
893 | return LRU_SKIP; |
894 | |
895 | kref_get(kref: &entry->ref); |
896 | |
897 | hlist_del_init(n: &entry->hnode); |
898 | atomic_long_dec(v: &cache->nent); |
899 | list_lru_isolate(list: lru, item: &entry->lru); |
900 | |
901 | spin_unlock(lock: &bucket->lock); |
902 | } else { |
903 | /* Listxattr cache entry */ |
904 | if (!spin_trylock(lock: &cache->listxattr_lock)) |
905 | return LRU_SKIP; |
906 | |
907 | kref_get(kref: &entry->ref); |
908 | |
909 | cache->listxattr = NULL; |
910 | list_lru_isolate(list: lru, item: &entry->lru); |
911 | |
912 | spin_unlock(lock: &cache->listxattr_lock); |
913 | } |
914 | |
915 | list_add_tail(new: &entry->dispose, head: dispose); |
916 | return LRU_REMOVED; |
917 | } |
918 | |
919 | static unsigned long |
920 | nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc) |
921 | { |
922 | LIST_HEAD(dispose); |
923 | unsigned long freed; |
924 | struct nfs4_xattr_entry *entry; |
925 | struct list_lru *lru; |
926 | |
927 | lru = (shrink == nfs4_xattr_large_entry_shrinker) ? |
928 | &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; |
929 | |
930 | freed = list_lru_shrink_walk(lru, sc, isolate: entry_lru_isolate, cb_arg: &dispose); |
931 | |
932 | while (!list_empty(head: &dispose)) { |
933 | entry = list_first_entry(&dispose, struct nfs4_xattr_entry, |
934 | dispose); |
935 | list_del_init(entry: &entry->dispose); |
936 | |
937 | /* |
938 | * Drop two references: the one that we just grabbed |
939 | * in entry_lru_isolate, and the one that was set |
940 | * when the entry was first allocated. |
941 | */ |
942 | kref_put(kref: &entry->ref, release: nfs4_xattr_free_entry_cb); |
943 | kref_put(kref: &entry->ref, release: nfs4_xattr_free_entry_cb); |
944 | } |
945 | |
946 | return freed; |
947 | } |
948 | |
949 | static unsigned long |
950 | nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc) |
951 | { |
952 | unsigned long count; |
953 | struct list_lru *lru; |
954 | |
955 | lru = (shrink == nfs4_xattr_large_entry_shrinker) ? |
956 | &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; |
957 | |
958 | count = list_lru_shrink_count(lru, sc); |
959 | return vfs_pressure_ratio(val: count); |
960 | } |
961 | |
962 | |
963 | static void nfs4_xattr_cache_init_once(void *p) |
964 | { |
965 | struct nfs4_xattr_cache *cache = p; |
966 | |
967 | spin_lock_init(&cache->listxattr_lock); |
968 | atomic_long_set(v: &cache->nent, i: 0); |
969 | nfs4_xattr_hash_init(cache); |
970 | cache->listxattr = NULL; |
971 | INIT_LIST_HEAD(list: &cache->lru); |
972 | INIT_LIST_HEAD(list: &cache->dispose); |
973 | } |
974 | |
975 | typedef unsigned long (*count_objects_cb)(struct shrinker *s, |
976 | struct shrink_control *sc); |
977 | typedef unsigned long (*scan_objects_cb)(struct shrinker *s, |
978 | struct shrink_control *sc); |
979 | |
980 | static int __init nfs4_xattr_shrinker_init(struct shrinker **shrinker, |
981 | struct list_lru *lru, const char *name, |
982 | count_objects_cb count, |
983 | scan_objects_cb scan, long batch, int seeks) |
984 | { |
985 | int ret; |
986 | |
987 | *shrinker = shrinker_alloc(SHRINKER_MEMCG_AWARE, fmt: name); |
988 | if (!*shrinker) |
989 | return -ENOMEM; |
990 | |
991 | ret = list_lru_init_memcg(lru, *shrinker); |
992 | if (ret) { |
993 | shrinker_free(shrinker: *shrinker); |
994 | return ret; |
995 | } |
996 | |
997 | (*shrinker)->count_objects = count; |
998 | (*shrinker)->scan_objects = scan; |
999 | (*shrinker)->batch = batch; |
1000 | (*shrinker)->seeks = seeks; |
1001 | |
1002 | shrinker_register(shrinker: *shrinker); |
1003 | |
1004 | return ret; |
1005 | } |
1006 | |
1007 | static void nfs4_xattr_shrinker_destroy(struct shrinker *shrinker, |
1008 | struct list_lru *lru) |
1009 | { |
1010 | shrinker_free(shrinker); |
1011 | list_lru_destroy(lru); |
1012 | } |
1013 | |
1014 | int __init nfs4_xattr_cache_init(void) |
1015 | { |
1016 | int ret = 0; |
1017 | |
1018 | nfs4_xattr_cache_cachep = kmem_cache_create(name: "nfs4_xattr_cache_cache" , |
1019 | size: sizeof(struct nfs4_xattr_cache), align: 0, |
1020 | flags: (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), |
1021 | ctor: nfs4_xattr_cache_init_once); |
1022 | if (nfs4_xattr_cache_cachep == NULL) |
1023 | return -ENOMEM; |
1024 | |
1025 | ret = nfs4_xattr_shrinker_init(shrinker: &nfs4_xattr_cache_shrinker, |
1026 | lru: &nfs4_xattr_cache_lru, name: "nfs-xattr_cache" , |
1027 | count: nfs4_xattr_cache_count, |
1028 | scan: nfs4_xattr_cache_scan, batch: 0, DEFAULT_SEEKS); |
1029 | if (ret) |
1030 | goto out1; |
1031 | |
1032 | ret = nfs4_xattr_shrinker_init(shrinker: &nfs4_xattr_entry_shrinker, |
1033 | lru: &nfs4_xattr_entry_lru, name: "nfs-xattr_entry" , |
1034 | count: nfs4_xattr_entry_count, |
1035 | scan: nfs4_xattr_entry_scan, batch: 512, DEFAULT_SEEKS); |
1036 | if (ret) |
1037 | goto out2; |
1038 | |
1039 | ret = nfs4_xattr_shrinker_init(shrinker: &nfs4_xattr_large_entry_shrinker, |
1040 | lru: &nfs4_xattr_large_entry_lru, |
1041 | name: "nfs-xattr_large_entry" , |
1042 | count: nfs4_xattr_entry_count, |
1043 | scan: nfs4_xattr_entry_scan, batch: 512, seeks: 1); |
1044 | if (!ret) |
1045 | return 0; |
1046 | |
1047 | nfs4_xattr_shrinker_destroy(shrinker: nfs4_xattr_entry_shrinker, |
1048 | lru: &nfs4_xattr_entry_lru); |
1049 | out2: |
1050 | nfs4_xattr_shrinker_destroy(shrinker: nfs4_xattr_cache_shrinker, |
1051 | lru: &nfs4_xattr_cache_lru); |
1052 | out1: |
1053 | kmem_cache_destroy(s: nfs4_xattr_cache_cachep); |
1054 | |
1055 | return ret; |
1056 | } |
1057 | |
1058 | void nfs4_xattr_cache_exit(void) |
1059 | { |
1060 | nfs4_xattr_shrinker_destroy(shrinker: nfs4_xattr_large_entry_shrinker, |
1061 | lru: &nfs4_xattr_large_entry_lru); |
1062 | nfs4_xattr_shrinker_destroy(shrinker: nfs4_xattr_entry_shrinker, |
1063 | lru: &nfs4_xattr_entry_lru); |
1064 | nfs4_xattr_shrinker_destroy(shrinker: nfs4_xattr_cache_shrinker, |
1065 | lru: &nfs4_xattr_cache_lru); |
1066 | kmem_cache_destroy(s: nfs4_xattr_cache_cachep); |
1067 | } |
1068 | |