zsmalloc.c source code [linux/mm/zsmalloc.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2
3	/*
4	* zsmalloc memory allocator
5	*
6	* Copyright (C) 2011 Nitin Gupta
7	* Copyright (C) 2012, 2013 Minchan Kim
8	*
9	* This code is released using a dual license strategy: BSD/GPL
10	* You can choose the license that better fits your requirements.
11	*
12	* Released under the terms of 3-clause BSD License
13	* Released under the terms of GNU General Public License Version 2.0
14	*/
15
16	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
17
18	/*
19	* lock ordering:
20	* page_lock
21	* pool->lock
22	* class->lock
23	* zspage->lock
24	*/
25
26	#include <linux/module.h>
27	#include <linux/kernel.h>
28	#include <linux/sched.h>
29	#include <linux/errno.h>
30	#include <linux/highmem.h>
31	#include <linux/string.h>
32	#include <linux/slab.h>
33	#include <linux/spinlock.h>
34	#include <linux/sprintf.h>
35	#include <linux/shrinker.h>
36	#include <linux/types.h>
37	#include <linux/debugfs.h>
38	#include <linux/zsmalloc.h>
39	#include <linux/fs.h>
40	#include <linux/workqueue.h>
41	#include "zpdesc.h"
42
43	#define ZSPAGE_MAGIC 0x58
44
45	/*
46	* This must be power of 2 and greater than or equal to sizeof(link_free).
47	* These two conditions ensure that any 'struct link_free' itself doesn't
48	* span more than 1 page which avoids complex case of mapping 2 pages simply
49	* to restore link_free pointer values.
50	*/
51	#define ZS_ALIGN 8
52
53	#define ZS_HANDLE_SIZE (sizeof(unsigned long))
54
55	/*
56	* Object location (<PFN>, <obj_idx>) is encoded as
57	* a single (unsigned long) handle value.
58	*
59	* Note that object index <obj_idx> starts from 0.
60	*
61	* This is made more complicated by various memory models and PAE.
62	*/
63
64	#ifndef MAX_POSSIBLE_PHYSMEM_BITS
65	#ifdef MAX_PHYSMEM_BITS
66	#define MAX_POSSIBLE_PHYSMEM_BITS MAX_PHYSMEM_BITS
67	#else
68	/*
69	* If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
70	* be PAGE_SHIFT
71	*/
72	#define MAX_POSSIBLE_PHYSMEM_BITS BITS_PER_LONG
73	#endif
74	#endif
75
76	#define _PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT)
77
78	/*
79	* Head in allocated object should have OBJ_ALLOCATED_TAG
80	* to identify the object was allocated or not.
81	* It's okay to add the status bit in the least bit because
82	* header keeps handle which is 4byte-aligned address so we
83	* have room for two bit at least.
84	*/
85	#define OBJ_ALLOCATED_TAG 1
86
87	#define OBJ_TAG_BITS 1
88	#define OBJ_TAG_MASK OBJ_ALLOCATED_TAG
89
90	#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
91	#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
92
93	#define HUGE_BITS 1
94	#define FULLNESS_BITS 4
95	#define CLASS_BITS 8
96	#define MAGIC_VAL_BITS 8
97
98	#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(CONFIG_ZSMALLOC_CHAIN_SIZE, UL))
99
100	/ ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN /
101	#define ZS_MIN_ALLOC_SIZE \
102	MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
103	/ each chunk includes extra space to keep handle /
104	#define ZS_MAX_ALLOC_SIZE PAGE_SIZE
105
106	/*
107	* On systems with 4K page size, this gives 255 size classes! There is a
108	* trader-off here:
109	* - Large number of size classes is potentially wasteful as free page are
110	* spread across these classes
111	* - Small number of size classes causes large internal fragmentation
112	* - Probably its better to use specific size classes (empirically
113	* determined). NOTE: all those class sizes must be set as multiple of
114	* ZS_ALIGN to make sure link_free itself never has to span 2 pages.
115	*
116	* ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN
117	* (reason above)
118	*/
119	#define ZS_SIZE_CLASS_DELTA (PAGE_SIZE >> CLASS_BITS)
120	#define ZS_SIZE_CLASSES (DIV_ROUND_UP(ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE, \
121	ZS_SIZE_CLASS_DELTA) + 1)
122
123	/*
124	* Pages are distinguished by the ratio of used memory (that is the ratio
125	* of ->inuse objects to all objects that page can store). For example,
126	* INUSE_RATIO_10 means that the ratio of used objects is > 0% and <= 10%.
127	*
128	* The number of fullness groups is not random. It allows us to keep
129	* difference between the least busy page in the group (minimum permitted
130	* number of ->inuse objects) and the most busy page (maximum permitted
131	* number of ->inuse objects) at a reasonable value.
132	*/
133	enum fullness_group {
134	ZS_INUSE_RATIO_0,
135	ZS_INUSE_RATIO_10,
136	/ NOTE: 8 more fullness groups here /
137	ZS_INUSE_RATIO_99 = `10`,
138	ZS_INUSE_RATIO_100,
139	NR_FULLNESS_GROUPS,
140	};
141
142	enum class_stat_type {
143	/ NOTE: stats for 12 fullness groups here: from inuse 0 to 100 /
144	ZS_OBJS_ALLOCATED = NR_FULLNESS_GROUPS,
145	ZS_OBJS_INUSE,
146	NR_CLASS_STAT_TYPES,
147	};
148
149	struct zs_size_stat {
150	unsigned long objs[NR_CLASS_STAT_TYPES];
151	};
152
153	#ifdef CONFIG_ZSMALLOC_STAT
154	static struct dentry *zs_stat_root;
155	#endif
156
157	static size_t huge_class_size;
158
159	struct size_class {
160	spinlock_t lock;
161	struct list_head fullness_list[NR_FULLNESS_GROUPS];
162	/*
163	* Size of objects stored in this class. Must be multiple
164	* of ZS_ALIGN.
165	*/
166	int size;
167	int objs_per_zspage;
168	/ Number of PAGE_SIZE sized pages to combine to form a 'zspage' /
169	int pages_per_zspage;
170
171	unsigned int index;
172	struct zs_size_stat stats;
173	};
174
175	/*
176	* Placed within free objects to form a singly linked list.
177	* For every zspage, zspage->freeobj gives head of this list.
178	*
179	* This must be power of 2 and less than or equal to ZS_ALIGN
180	*/
181	struct link_free {
182	union {
183	/*
184	* Free object index;
185	* It's valid for non-allocated object
186	*/
187	unsigned long next;
188	/*
189	* Handle of allocated object.
190	*/
191	unsigned long handle;
192	};
193	};
194
195	struct zs_pool {
196	const char *name;
197
198	struct size_class *size_class[ZS_SIZE_CLASSES];
199	struct kmem_cache *handle_cachep;
200	struct kmem_cache *zspage_cachep;
201
202	atomic_long_t pages_allocated;
203
204	struct zs_pool_stats stats;
205
206	/ Compact classes /
207	struct shrinker *shrinker;
208
209	#ifdef CONFIG_ZSMALLOC_STAT
210	struct dentry *stat_dentry;
211	#endif
212	#ifdef CONFIG_COMPACTION
213	struct work_struct free_work;
214	#endif
215	/ protect zspage migration/compaction /
216	rwlock_t lock;
217	atomic_t compaction_in_progress;
218	};
219
220	static inline void zpdesc_set_first(struct zpdesc *zpdesc)
221	{
222	SetPagePrivate(zpdesc_page(zpdesc));
223	}
224
225	static inline void zpdesc_inc_zone_page_state(struct zpdesc *zpdesc)
226	{
227	inc_zone_page_state(zpdesc_page(zpdesc), NR_ZSPAGES);
228	}
229
230	static inline void zpdesc_dec_zone_page_state(struct zpdesc *zpdesc)
231	{
232	dec_zone_page_state(zpdesc_page(zpdesc), NR_ZSPAGES);
233	}
234
235	static inline struct zpdesc alloc_zpdesc(gfp_t gfp, const* int nid)
236	{
237	struct page *page = alloc_pages_node(nid, gfp, `0`);
238
239	return page_zpdesc(page);
240	}
241
242	static inline void free_zpdesc(struct zpdesc *zpdesc)
243	{
244	struct page *page = zpdesc_page(zpdesc);
245
246	/ PageZsmalloc is sticky until the page is freed to the buddy. /
247	__free_page(page);
248	}
249
250	#define ZS_PAGE_UNLOCKED 0
251	#define ZS_PAGE_WRLOCKED -1
252
253	struct zspage_lock {
254	spinlock_t lock;
255	int cnt;
256	struct lockdep_map dep_map;
257	};
258
259	struct zspage {
260	struct {
261	unsigned int huge:HUGE_BITS;
262	unsigned int fullness:FULLNESS_BITS;
263	unsigned int class:CLASS_BITS + `1`;
264	unsigned int magic:MAGIC_VAL_BITS;
265	};
266	unsigned int inuse;
267	unsigned int freeobj;
268	struct zpdesc *first_zpdesc;
269	struct list_head list; / fullness list /
270	struct zs_pool *pool;
271	struct zspage_lock zsl;
272	};
273
274	static void zspage_lock_init(struct zspage *zspage)
275	{
276	static struct lock_class_key __key;
277	struct zspage_lock *zsl = &zspage->zsl;
278
279	lockdep_init_map(lock: &zsl->dep_map, name: "zspage->lock", key: &__key, subclass: `0`);
280	spin_lock_init(&zsl->lock);
281	zsl->cnt = ZS_PAGE_UNLOCKED;
282	}
283
284	/*
285	* The zspage lock can be held from atomic contexts, but it needs to remain
286	* preemptible when held for reading because it remains held outside of those
287	* atomic contexts, otherwise we unnecessarily lose preemptibility.
288	*
289	* To achieve this, the following rules are enforced on readers and writers:
290	*
291	* - Writers are blocked by both writers and readers, while readers are only
292	* blocked by writers (i.e. normal rwlock semantics).
293	*
294	* - Writers are always atomic (to allow readers to spin waiting for them).
295	*
296	* - Writers always use trylock (as the lock may be held be sleeping readers).
297	*
298	* - Readers may spin on the lock (as they can only wait for atomic writers).
299	*
300	* - Readers may sleep while holding the lock (as writes only use trylock).
301	*/
302	static void zspage_read_lock(struct zspage *zspage)
303	{
304	struct zspage_lock *zsl = &zspage->zsl;
305
306	rwsem_acquire_read(&zsl->dep_map, `0`, `0`, _RET_IP_);
307
308	spin_lock(lock: &zsl->lock);
309	zsl->cnt++;
310	spin_unlock(lock: &zsl->lock);
311
312	lock_acquired(lock: &zsl->dep_map, _RET_IP_);
313	}
314
315	static void zspage_read_unlock(struct zspage *zspage)
316	{
317	struct zspage_lock *zsl = &zspage->zsl;
318
319	rwsem_release(&zsl->dep_map, _RET_IP_);
320
321	spin_lock(lock: &zsl->lock);
322	zsl->cnt--;
323	spin_unlock(lock: &zsl->lock);
324	}
325
326	static __must_check bool zspage_write_trylock(struct zspage *zspage)
327	{
328	struct zspage_lock *zsl = &zspage->zsl;
329
330	spin_lock(lock: &zsl->lock);
331	if (zsl->cnt == ZS_PAGE_UNLOCKED) {
332	zsl->cnt = ZS_PAGE_WRLOCKED;
333	rwsem_acquire(&zsl->dep_map, `0`, `1`, _RET_IP_);
334	lock_acquired(lock: &zsl->dep_map, _RET_IP_);
335	return true;
336	}
337
338	spin_unlock(lock: &zsl->lock);
339	return false;
340	}
341
342	static void zspage_write_unlock(struct zspage *zspage)
343	{
344	struct zspage_lock *zsl = &zspage->zsl;
345
346	rwsem_release(&zsl->dep_map, _RET_IP_);
347
348	zsl->cnt = ZS_PAGE_UNLOCKED;
349	spin_unlock(lock: &zsl->lock);
350	}
351
352	/ huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 /
353	static void SetZsHugePage(struct zspage *zspage)
354	{
355	zspage->huge = `1`;
356	}
357
358	static bool ZsHugePage(struct zspage *zspage)
359	{
360	return zspage->huge;
361	}
362
363	#ifdef CONFIG_COMPACTION
364	static void kick_deferred_free(struct zs_pool *pool);
365	static void init_deferred_free(struct zs_pool *pool);
366	static void SetZsPageMovable(struct zs_pool pool, struct* zspage *zspage);
367	#else
368	static void kick_deferred_free(struct zs_pool *pool) {}
369	static void init_deferred_free(struct zs_pool *pool) {}
370	static void SetZsPageMovable(struct zs_pool pool, struct* zspage *zspage) {}
371	#endif
372
373	static int create_cache(struct zs_pool *pool)
374	{
375	char *name;
376
377	name = kasprintf(GFP_KERNEL, fmt: "zs_handle-%s", pool->name);
378	if (!name)
379	return -ENOMEM;
380	pool->handle_cachep = kmem_cache_create(name, ZS_HANDLE_SIZE,
381	`0`, `0`, NULL);
382	kfree(objp: name);
383	if (!pool->handle_cachep)
384	return -EINVAL;
385
386	name = kasprintf(GFP_KERNEL, fmt: "zspage-%s", pool->name);
387	if (!name)
388	return -ENOMEM;
389	pool->zspage_cachep = kmem_cache_create(name, sizeof(struct zspage),
390	`0`, `0`, NULL);
391	kfree(objp: name);
392	if (!pool->zspage_cachep) {
393	kmem_cache_destroy(s: pool->handle_cachep);
394	pool->handle_cachep = NULL;
395	return -EINVAL;
396	}
397
398	return `0`;
399	}
400
401	static void destroy_cache(struct zs_pool *pool)
402	{
403	kmem_cache_destroy(s: pool->handle_cachep);
404	kmem_cache_destroy(s: pool->zspage_cachep);
405	}
406
407	static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
408	{
409	return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
410	gfp & ~(__GFP_HIGHMEM\|__GFP_MOVABLE));
411	}
412
413	static void cache_free_handle(struct zs_pool pool, unsigned* long handle)
414	{
415	kmem_cache_free(s: pool->handle_cachep, objp: (void *)handle);
416	}
417
418	static struct zspage cache_alloc_zspage(struct* zs_pool *pool, gfp_t flags)
419	{
420	return kmem_cache_zalloc(pool->zspage_cachep,
421	flags & ~(__GFP_HIGHMEM\|__GFP_MOVABLE));
422	}
423
424	static void cache_free_zspage(struct zs_pool pool, struct* zspage *zspage)
425	{
426	kmem_cache_free(s: pool->zspage_cachep, objp: zspage);
427	}
428
429	/ class->lock(which owns the handle) synchronizes races /
430	static void record_obj(unsigned long handle, unsigned long obj)
431	{
432	(unsigned* long *)handle = obj;
433	}
434
435	static inline bool __maybe_unused is_first_zpdesc(struct zpdesc *zpdesc)
436	{
437	return PagePrivate(zpdesc_page(zpdesc));
438	}
439
440	/ Protected by class->lock /
441	static inline int get_zspage_inuse(struct zspage *zspage)
442	{
443	return zspage->inuse;
444	}
445
446	static inline void mod_zspage_inuse(struct zspage zspage, int* val)
447	{
448	zspage->inuse += val;
449	}
450
451	static struct zpdesc get_first_zpdesc(struct* zspage *zspage)
452	{
453	struct zpdesc *first_zpdesc = zspage->first_zpdesc;
454
455	VM_BUG_ON_PAGE(!is_first_zpdesc(first_zpdesc), zpdesc_page(first_zpdesc));
456	return first_zpdesc;
457	}
458
459	#define FIRST_OBJ_PAGE_TYPE_MASK 0xffffff
460
461	static inline unsigned int get_first_obj_offset(struct zpdesc *zpdesc)
462	{
463	VM_WARN_ON_ONCE(!PageZsmalloc(zpdesc_page(zpdesc)));
464	return zpdesc->first_obj_offset & FIRST_OBJ_PAGE_TYPE_MASK;
465	}
466
467	static inline void set_first_obj_offset(struct zpdesc zpdesc, unsigned* int offset)
468	{
469	/ With 24 bits available, we can support offsets into 16 MiB pages. /
470	BUILD_BUG_ON(PAGE_SIZE > SZ_16M);
471	VM_WARN_ON_ONCE(!PageZsmalloc(zpdesc_page(zpdesc)));
472	VM_WARN_ON_ONCE(offset & ~FIRST_OBJ_PAGE_TYPE_MASK);
473	zpdesc->first_obj_offset &= ~FIRST_OBJ_PAGE_TYPE_MASK;
474	zpdesc->first_obj_offset \|= offset & FIRST_OBJ_PAGE_TYPE_MASK;
475	}
476
477	static inline unsigned int get_freeobj(struct zspage *zspage)
478	{
479	return zspage->freeobj;
480	}
481
482	static inline void set_freeobj(struct zspage zspage, unsigned* int obj)
483	{
484	zspage->freeobj = obj;
485	}
486
487	static struct size_class zspage_class(struct* zs_pool *pool,
488	struct zspage *zspage)
489	{
490	return pool->size_class[zspage->class];
491	}
492
493	/*
494	* zsmalloc divides the pool into various size classes where each
495	* class maintains a list of zspages where each zspage is divided
496	* into equal sized chunks. Each allocation falls into one of these
497	* classes depending on its size. This function returns index of the
498	* size class which has chunk size big enough to hold the given size.
499	*/
500	static int get_size_class_index(int size)
501	{
502	int idx = `0`;
503
504	if (likely(size > ZS_MIN_ALLOC_SIZE))
505	idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE,
506	ZS_SIZE_CLASS_DELTA);
507
508	return min_t(int, ZS_SIZE_CLASSES - `1`, idx);
509	}
510
511	static inline void class_stat_add(struct size_class class, int* type,
512	unsigned long cnt)
513	{
514	class->stats.objs[type] += cnt;
515	}
516
517	static inline void class_stat_sub(struct size_class class, int* type,
518	unsigned long cnt)
519	{
520	class->stats.objs[type] -= cnt;
521	}
522
523	static inline unsigned long class_stat_read(struct size_class class, int* type)
524	{
525	return class->stats.objs[type];
526	}
527
528	#ifdef CONFIG_ZSMALLOC_STAT
529
530	static void __init zs_stat_init(void)
531	{
532	if (!debugfs_initialized()) {
533	pr_warn("debugfs not available, stat dir not created\n");
534	return;
535	}
536
537	zs_stat_root = debugfs_create_dir(name: "zsmalloc", NULL);
538	}
539
540	static void __exit zs_stat_exit(void)
541	{
542	debugfs_remove_recursive(dentry: zs_stat_root);
543	}
544
545	static unsigned long zs_can_compact(struct size_class *class);
546
547	static int zs_stats_size_show(struct seq_file s, void* *v)
548	{
549	int i, fg;
550	struct zs_pool *pool = s->private;
551	struct size_class *class;
552	int objs_per_zspage;
553	unsigned long obj_allocated, obj_used, pages_used, freeable;
554	unsigned long total_objs = `0`, total_used_objs = `0`, total_pages = `0`;
555	unsigned long total_freeable = `0`;
556	unsigned long inuse_totals[NR_FULLNESS_GROUPS] = {`0`, };
557
558	seq_printf(m: s, fmt: " %5s %5s %9s %9s %9s %9s %9s %9s %9s %9s %9s %9s %9s %13s %10s %10s %16s %8s\n",
559	"class", "size", "10%", "20%", "30%", "40%",
560	"50%", "60%", "70%", "80%", "90%", "99%", "100%",
561	"obj_allocated", "obj_used", "pages_used",
562	"pages_per_zspage", "freeable");
563
564	for (i = `0`; i < ZS_SIZE_CLASSES; i++) {
565
566	class = pool->size_class[i];
567
568	if (class->index != i)
569	continue;
570
571	spin_lock(lock: &class->lock);
572
573	seq_printf(m: s, fmt: " %5u %5u ", i, class->size);
574	for (fg = ZS_INUSE_RATIO_10; fg < NR_FULLNESS_GROUPS; fg++) {
575	inuse_totals[fg] += class_stat_read(class, type: fg);
576	seq_printf(m: s, fmt: "%9lu ", class_stat_read(class, type: fg));
577	}
578
579	obj_allocated = class_stat_read(class, type: ZS_OBJS_ALLOCATED);
580	obj_used = class_stat_read(class, type: ZS_OBJS_INUSE);
581	freeable = zs_can_compact(class);
582	spin_unlock(lock: &class->lock);
583
584	objs_per_zspage = class->objs_per_zspage;
585	pages_used = obj_allocated / objs_per_zspage *
586	class->pages_per_zspage;
587
588	seq_printf(m: s, fmt: "%13lu %10lu %10lu %16d %8lu\n",
589	obj_allocated, obj_used, pages_used,
590	class->pages_per_zspage, freeable);
591
592	total_objs += obj_allocated;
593	total_used_objs += obj_used;
594	total_pages += pages_used;
595	total_freeable += freeable;
596	}
597
598	seq_puts(m: s, s: "\n");
599	seq_printf(m: s, fmt: " %5s %5s ", "Total", "");
600
601	for (fg = ZS_INUSE_RATIO_10; fg < NR_FULLNESS_GROUPS; fg++)
602	seq_printf(m: s, fmt: "%9lu ", inuse_totals[fg]);
603
604	seq_printf(m: s, fmt: "%13lu %10lu %10lu %16s %8lu\n",
605	total_objs, total_used_objs, total_pages, "",
606	total_freeable);
607
608	return `0`;
609	}
610	DEFINE_SHOW_ATTRIBUTE(zs_stats_size);
611
612	static void zs_pool_stat_create(struct zs_pool pool, const* char *name)
613	{
614	if (!zs_stat_root) {
615	pr_warn("no root stat dir, not creating <%s> stat dir\n", name);
616	return;
617	}
618
619	pool->stat_dentry = debugfs_create_dir(name, parent: zs_stat_root);
620
621	debugfs_create_file("classes", S_IFREG \| `0444`, pool->stat_dentry, pool,
622	&zs_stats_size_fops);
623	}
624
625	static void zs_pool_stat_destroy(struct zs_pool *pool)
626	{
627	debugfs_remove_recursive(dentry: pool->stat_dentry);
628	}
629
630	#else /* CONFIG_ZSMALLOC_STAT */
631	static void __init zs_stat_init(void)
632	{
633	}
634
635	static void __exit zs_stat_exit(void)
636	{
637	}
638
639	static inline void zs_pool_stat_create(struct zs_pool pool, const* char *name)
640	{
641	}
642
643	static inline void zs_pool_stat_destroy(struct zs_pool *pool)
644	{
645	}
646	#endif
647
648
649	/*
650	* For each size class, zspages are divided into different groups
651	* depending on their usage ratio. This function returns fullness
652	* status of the given page.
653	*/
654	static int get_fullness_group(struct size_class class, struct* zspage *zspage)
655	{
656	int inuse, objs_per_zspage, ratio;
657
658	inuse = get_zspage_inuse(zspage);
659	objs_per_zspage = class->objs_per_zspage;
660
661	if (inuse == `0`)
662	return ZS_INUSE_RATIO_0;
663	if (inuse == objs_per_zspage)
664	return ZS_INUSE_RATIO_100;
665
666	ratio = `100` * inuse / objs_per_zspage;
667	/*
668	* Take integer division into consideration: a page with one inuse
669	* object out of 127 possible, will end up having 0 usage ratio,
670	* which is wrong as it belongs in ZS_INUSE_RATIO_10 fullness group.
671	*/
672	return ratio / `10` + `1`;
673	}
674
675	/*
676	* Each size class maintains various freelists and zspages are assigned
677	* to one of these freelists based on the number of live objects they
678	* have. This functions inserts the given zspage into the freelist
679	* identified by <class, fullness_group>.
680	*/
681	static void insert_zspage(struct size_class *class,
682	struct zspage *zspage,
683	int fullness)
684	{
685	class_stat_add(class, type: fullness, cnt: `1`);
686	list_add(new: &zspage->list, head: &class->fullness_list[fullness]);
687	zspage->fullness = fullness;
688	}
689
690	/*
691	* This function removes the given zspage from the freelist identified
692	* by <class, fullness_group>.
693	*/
694	static void remove_zspage(struct size_class class, struct* zspage *zspage)
695	{
696	int fullness = zspage->fullness;
697
698	VM_BUG_ON(list_empty(&class->fullness_list[fullness]));
699
700	list_del_init(entry: &zspage->list);
701	class_stat_sub(class, type: fullness, cnt: `1`);
702	}
703
704	/*
705	* Each size class maintains zspages in different fullness groups depending
706	* on the number of live objects they contain. When allocating or freeing
707	* objects, the fullness status of the page can change, for instance, from
708	* INUSE_RATIO_80 to INUSE_RATIO_70 when freeing an object. This function
709	* checks if such a status change has occurred for the given page and
710	* accordingly moves the page from the list of the old fullness group to that
711	* of the new fullness group.
712	*/
713	static int fix_fullness_group(struct size_class class, struct* zspage *zspage)
714	{
715	int newfg;
716
717	newfg = get_fullness_group(class, zspage);
718	if (newfg == zspage->fullness)
719	goto out;
720
721	remove_zspage(class, zspage);
722	insert_zspage(class, zspage, fullness: newfg);
723	out:
724	return newfg;
725	}
726
727	static struct zspage get_zspage(struct* zpdesc *zpdesc)
728	{
729	struct zspage *zspage = zpdesc->zspage;
730
731	BUG_ON(zspage->magic != ZSPAGE_MAGIC);
732	return zspage;
733	}
734
735	static struct zpdesc get_next_zpdesc(struct* zpdesc *zpdesc)
736	{
737	struct zspage *zspage = get_zspage(zpdesc);
738
739	if (unlikely(ZsHugePage(zspage)))
740	return NULL;
741
742	return zpdesc->next;
743	}
744
745	/**
746	* obj_to_location - get (<zpdesc>, <obj_idx>) from encoded object value
747	* @obj: the encoded object value
748	* @zpdesc: zpdesc object resides in zspage
749	* @obj_idx: object index
750	*/
751	static void obj_to_location(unsigned long obj, struct zpdesc **zpdesc,
752	unsigned int *obj_idx)
753	{
754	*zpdesc = pfn_zpdesc(pfn: obj >> OBJ_INDEX_BITS);
755	*obj_idx = (obj & OBJ_INDEX_MASK);
756	}
757
758	static void obj_to_zpdesc(unsigned long obj, struct zpdesc **zpdesc)
759	{
760	*zpdesc = pfn_zpdesc(pfn: obj >> OBJ_INDEX_BITS);
761	}
762
763	/**
764	* location_to_obj - get obj value encoded from (<zpdesc>, <obj_idx>)
765	* @zpdesc: zpdesc object resides in zspage
766	* @obj_idx: object index
767	*/
768	static unsigned long location_to_obj(struct zpdesc zpdesc, unsigned* int obj_idx)
769	{
770	unsigned long obj;
771
772	obj = zpdesc_pfn(zpdesc) << OBJ_INDEX_BITS;
773	obj \|= obj_idx & OBJ_INDEX_MASK;
774
775	return obj;
776	}
777
778	static unsigned long handle_to_obj(unsigned long handle)
779	{
780	return (unsigned* long *)handle;
781	}
782
783	static inline bool obj_allocated(struct zpdesc zpdesc, void* *obj,
784	unsigned long *phandle)
785	{
786	unsigned long handle;
787	struct zspage *zspage = get_zspage(zpdesc);
788
789	if (unlikely(ZsHugePage(zspage))) {
790	VM_BUG_ON_PAGE(!is_first_zpdesc(zpdesc), zpdesc_page(zpdesc));
791	handle = zpdesc->handle;
792	} else
793	handle = (unsigned* long *)obj;
794
795	if (!(handle & OBJ_ALLOCATED_TAG))
796	return false;
797
798	/ Clear all tags before returning the handle /
799	*phandle = handle & ~OBJ_TAG_MASK;
800	return true;
801	}
802
803	static void reset_zpdesc(struct zpdesc *zpdesc)
804	{
805	struct page *page = zpdesc_page(zpdesc);
806
807	ClearPagePrivate(page);
808	zpdesc->zspage = NULL;
809	zpdesc->next = NULL;
810	/ PageZsmalloc is sticky until the page is freed to the buddy. /
811	}
812
813	static int trylock_zspage(struct zspage *zspage)
814	{
815	struct zpdesc cursor, fail;
816
817	for (cursor = get_first_zpdesc(zspage); cursor != NULL; cursor =
818	get_next_zpdesc(zpdesc: cursor)) {
819	if (!zpdesc_trylock(zpdesc: cursor)) {
820	fail = cursor;
821	goto unlock;
822	}
823	}
824
825	return `1`;
826	unlock:
827	for (cursor = get_first_zpdesc(zspage); cursor != fail; cursor =
828	get_next_zpdesc(zpdesc: cursor))
829	zpdesc_unlock(zpdesc: cursor);
830
831	return `0`;
832	}
833
834	static void __free_zspage(struct zs_pool pool, struct* size_class *class,
835	struct zspage *zspage)
836	{
837	struct zpdesc zpdesc, next;
838
839	assert_spin_locked(&class->lock);
840
841	VM_BUG_ON(get_zspage_inuse(zspage));
842	VM_BUG_ON(zspage->fullness != ZS_INUSE_RATIO_0);
843
844	next = zpdesc = get_first_zpdesc(zspage);
845	do {
846	VM_BUG_ON_PAGE(!zpdesc_is_locked(zpdesc), zpdesc_page(zpdesc));
847	next = get_next_zpdesc(zpdesc);
848	reset_zpdesc(zpdesc);
849	zpdesc_unlock(zpdesc);
850	zpdesc_dec_zone_page_state(zpdesc);
851	zpdesc_put(zpdesc);
852	zpdesc = next;
853	} while (zpdesc != NULL);
854
855	cache_free_zspage(pool, zspage);
856
857	class_stat_sub(class, type: ZS_OBJS_ALLOCATED, cnt: class->objs_per_zspage);
858	atomic_long_sub(i: class->pages_per_zspage, v: &pool->pages_allocated);
859	}
860
861	static void free_zspage(struct zs_pool pool, struct* size_class *class,
862	struct zspage *zspage)
863	{
864	VM_BUG_ON(get_zspage_inuse(zspage));
865	VM_BUG_ON(list_empty(&zspage->list));
866
867	/*
868	* Since zs_free couldn't be sleepable, this function cannot call
869	* lock_page. The page locks trylock_zspage got will be released
870	* by __free_zspage.
871	*/
872	if (!trylock_zspage(zspage)) {
873	kick_deferred_free(pool);
874	return;
875	}
876
877	remove_zspage(class, zspage);
878	__free_zspage(pool, class, zspage);
879	}
880
881	/ Initialize a newly allocated zspage /
882	static void init_zspage(struct size_class class, struct* zspage *zspage)
883	{
884	unsigned int freeobj = `1`;
885	unsigned long off = `0`;
886	struct zpdesc *zpdesc = get_first_zpdesc(zspage);
887
888	while (zpdesc) {
889	struct zpdesc *next_zpdesc;
890	struct link_free *link;
891	void *vaddr;
892
893	set_first_obj_offset(zpdesc, offset: off);
894
895	vaddr = kmap_local_zpdesc(zpdesc);
896	link = (struct link_free )vaddr + off / sizeof(link);
897
898	while ((off += class->size) < PAGE_SIZE) {
899	link->next = freeobj++ << OBJ_TAG_BITS;
900	link += class->size / sizeof(*link);
901	}
902
903	/*
904	* We now come to the last (full or partial) object on this
905	* page, which must point to the first object on the next
906	* page (if present)
907	*/
908	next_zpdesc = get_next_zpdesc(zpdesc);
909	if (next_zpdesc) {
910	link->next = freeobj++ << OBJ_TAG_BITS;
911	} else {
912	/*
913	* Reset OBJ_TAG_BITS bit to last link to tell
914	* whether it's allocated object or not.
915	*/
916	link->next = -`1UL` << OBJ_TAG_BITS;
917	}
918	kunmap_local(vaddr);
919	zpdesc = next_zpdesc;
920	off %= PAGE_SIZE;
921	}
922
923	set_freeobj(zspage, obj: `0`);
924	}
925
926	static void create_page_chain(struct size_class class, struct* zspage *zspage,
927	struct zpdesc *zpdescs[])
928	{
929	int i;
930	struct zpdesc *zpdesc;
931	struct zpdesc *prev_zpdesc = NULL;
932	int nr_zpdescs = class->pages_per_zspage;
933
934	/*
935	* Allocate individual pages and link them together as:
936	* 1. all pages are linked together using zpdesc->next
937	* 2. each sub-page point to zspage using zpdesc->zspage
938	*
939	* we set PG_private to identify the first zpdesc (i.e. no other zpdesc
940	* has this flag set).
941	*/
942	for (i = `0`; i < nr_zpdescs; i++) {
943	zpdesc = zpdescs[i];
944	zpdesc->zspage = zspage;
945	zpdesc->next = NULL;
946	if (i == `0`) {
947	zspage->first_zpdesc = zpdesc;
948	zpdesc_set_first(zpdesc);
949	if (unlikely(class->objs_per_zspage == `1` &&
950	class->pages_per_zspage == `1`))
951	SetZsHugePage(zspage);
952	} else {
953	prev_zpdesc->next = zpdesc;
954	}
955	prev_zpdesc = zpdesc;
956	}
957	}
958
959	/*
960	* Allocate a zspage for the given size class
961	*/
962	static struct zspage alloc_zspage(struct* zs_pool *pool,
963	struct size_class *class,
964	gfp_t gfp, const int nid)
965	{
966	int i;
967	struct zpdesc *zpdescs[ZS_MAX_PAGES_PER_ZSPAGE];
968	struct zspage *zspage = cache_alloc_zspage(pool, flags: gfp);
969
970	if (!zspage)
971	return NULL;
972
973	if (!IS_ENABLED(CONFIG_COMPACTION))
974	gfp &= ~__GFP_MOVABLE;
975
976	zspage->magic = ZSPAGE_MAGIC;
977	zspage->pool = pool;
978	zspage->class = class->index;
979	zspage_lock_init(zspage);
980
981	for (i = `0`; i < class->pages_per_zspage; i++) {
982	struct zpdesc *zpdesc;
983
984	zpdesc = alloc_zpdesc(gfp, nid);
985	if (!zpdesc) {
986	while (--i >= `0`) {
987	zpdesc_dec_zone_page_state(zpdesc: zpdescs[i]);
988	free_zpdesc(zpdesc: zpdescs[i]);
989	}
990	cache_free_zspage(pool, zspage);
991	return NULL;
992	}
993	__zpdesc_set_zsmalloc(zpdesc);
994
995	zpdesc_inc_zone_page_state(zpdesc);
996	zpdescs[i] = zpdesc;
997	}
998
999	create_page_chain(class, zspage, zpdescs);
1000	init_zspage(class, zspage);
1001
1002	return zspage;
1003	}
1004
1005	static struct zspage find_get_zspage(struct* size_class *class)
1006	{
1007	int i;
1008	struct zspage *zspage;
1009
1010	for (i = ZS_INUSE_RATIO_99; i >= ZS_INUSE_RATIO_0; i--) {
1011	zspage = list_first_entry_or_null(&class->fullness_list[i],
1012	struct zspage, list);
1013	if (zspage)
1014	break;
1015	}
1016
1017	return zspage;
1018	}
1019
1020	static bool can_merge(struct size_class prev, int* pages_per_zspage,
1021	int objs_per_zspage)
1022	{
1023	if (prev->pages_per_zspage == pages_per_zspage &&
1024	prev->objs_per_zspage == objs_per_zspage)
1025	return true;
1026
1027	return false;
1028	}
1029
1030	static bool zspage_full(struct size_class class, struct* zspage *zspage)
1031	{
1032	return get_zspage_inuse(zspage) == class->objs_per_zspage;
1033	}
1034
1035	static bool zspage_empty(struct zspage *zspage)
1036	{
1037	return get_zspage_inuse(zspage) == `0`;
1038	}
1039
1040	/**
1041	* zs_lookup_class_index() - Returns index of the zsmalloc &size_class
1042	* that hold objects of the provided size.
1043	* @pool: zsmalloc pool to use
1044	* @size: object size
1045	*
1046	* Context: Any context.
1047	*
1048	* Return: the index of the zsmalloc &size_class that hold objects of the
1049	* provided size.
1050	*/
1051	unsigned int zs_lookup_class_index(struct zs_pool pool, unsigned* int size)
1052	{
1053	struct size_class *class;
1054
1055	class = pool->size_class[get_size_class_index(size)];
1056
1057	return class->index;
1058	}
1059	EXPORT_SYMBOL_GPL(zs_lookup_class_index);
1060
1061	unsigned long zs_get_total_pages(struct zs_pool *pool)
1062	{
1063	return atomic_long_read(v: &pool->pages_allocated);
1064	}
1065	EXPORT_SYMBOL_GPL(zs_get_total_pages);
1066
1067	void zs_obj_read_begin(struct* zs_pool pool, unsigned* long handle,
1068	void *local_copy)
1069	{
1070	struct zspage *zspage;
1071	struct zpdesc *zpdesc;
1072	unsigned long obj, off;
1073	unsigned int obj_idx;
1074	struct size_class *class;
1075	void *addr;
1076
1077	/ Guarantee we can get zspage from handle safely /
1078	read_lock(&pool->lock);
1079	obj = handle_to_obj(handle);
1080	obj_to_location(obj, zpdesc: &zpdesc, obj_idx: &obj_idx);
1081	zspage = get_zspage(zpdesc);
1082
1083	/ Make sure migration doesn't move any pages in this zspage /
1084	zspage_read_lock(zspage);
1085	read_unlock(&pool->lock);
1086
1087	class = zspage_class(pool, zspage);
1088	off = offset_in_page(class->size * obj_idx);
1089
1090	if (off + class->size <= PAGE_SIZE) {
1091	/ this object is contained entirely within a page /
1092	addr = kmap_local_zpdesc(zpdesc);
1093	addr += off;
1094	} else {
1095	size_t sizes[`2`];
1096
1097	/ this object spans two pages /
1098	sizes[`0`] = PAGE_SIZE - off;
1099	sizes[`1`] = class->size - sizes[`0`];
1100	addr = local_copy;
1101
1102	memcpy_from_page(to: addr, zpdesc_page(zpdesc),
1103	offset: off, len: sizes[`0`]);
1104	zpdesc = get_next_zpdesc(zpdesc);
1105	memcpy_from_page(to: addr + sizes[`0`],
1106	zpdesc_page(zpdesc),
1107	offset: `0`, len: sizes[`1`]);
1108	}
1109
1110	if (!ZsHugePage(zspage))
1111	addr += ZS_HANDLE_SIZE;
1112
1113	return addr;
1114	}
1115	EXPORT_SYMBOL_GPL(zs_obj_read_begin);
1116
1117	void zs_obj_read_end(struct zs_pool pool, unsigned* long handle,
1118	void *handle_mem)
1119	{
1120	struct zspage *zspage;
1121	struct zpdesc *zpdesc;
1122	unsigned long obj, off;
1123	unsigned int obj_idx;
1124	struct size_class *class;
1125
1126	obj = handle_to_obj(handle);
1127	obj_to_location(obj, zpdesc: &zpdesc, obj_idx: &obj_idx);
1128	zspage = get_zspage(zpdesc);
1129	class = zspage_class(pool, zspage);
1130	off = offset_in_page(class->size * obj_idx);
1131
1132	if (off + class->size <= PAGE_SIZE) {
1133	if (!ZsHugePage(zspage))
1134	off += ZS_HANDLE_SIZE;
1135	handle_mem -= off;
1136	kunmap_local(handle_mem);
1137	}
1138
1139	zspage_read_unlock(zspage);
1140	}
1141	EXPORT_SYMBOL_GPL(zs_obj_read_end);
1142
1143	void zs_obj_write(struct zs_pool pool, unsigned* long handle,
1144	void *handle_mem, size_t mem_len)
1145	{
1146	struct zspage *zspage;
1147	struct zpdesc *zpdesc;
1148	unsigned long obj, off;
1149	unsigned int obj_idx;
1150	struct size_class *class;
1151
1152	/ Guarantee we can get zspage from handle safely /
1153	read_lock(&pool->lock);
1154	obj = handle_to_obj(handle);
1155	obj_to_location(obj, zpdesc: &zpdesc, obj_idx: &obj_idx);
1156	zspage = get_zspage(zpdesc);
1157
1158	/ Make sure migration doesn't move any pages in this zspage /
1159	zspage_read_lock(zspage);
1160	read_unlock(&pool->lock);
1161
1162	class = zspage_class(pool, zspage);
1163	off = offset_in_page(class->size * obj_idx);
1164
1165	if (!ZsHugePage(zspage))
1166	off += ZS_HANDLE_SIZE;
1167
1168	if (off + mem_len <= PAGE_SIZE) {
1169	/ this object is contained entirely within a page /
1170	void *dst = kmap_local_zpdesc(zpdesc);
1171
1172	memcpy(dst + off, handle_mem, mem_len);
1173	kunmap_local(dst);
1174	} else {
1175	/ this object spans two pages /
1176	size_t sizes[`2`];
1177
1178	sizes[`0`] = PAGE_SIZE - off;
1179	sizes[`1`] = mem_len - sizes[`0`];
1180
1181	memcpy_to_page(zpdesc_page(zpdesc), offset: off,
1182	from: handle_mem, len: sizes[`0`]);
1183	zpdesc = get_next_zpdesc(zpdesc);
1184	memcpy_to_page(zpdesc_page(zpdesc), offset: `0`,
1185	from: handle_mem + sizes[`0`], len: sizes[`1`]);
1186	}
1187
1188	zspage_read_unlock(zspage);
1189	}
1190	EXPORT_SYMBOL_GPL(zs_obj_write);
1191
1192	/**
1193	* zs_huge_class_size() - Returns the size (in bytes) of the first huge
1194	* zsmalloc &size_class.
1195	* @pool: zsmalloc pool to use
1196	*
1197	* The function returns the size of the first huge class - any object of equal
1198	* or bigger size will be stored in zspage consisting of a single physical
1199	* page.
1200	*
1201	* Context: Any context.
1202	*
1203	* Return: the size (in bytes) of the first huge zsmalloc &size_class.
1204	*/
1205	size_t zs_huge_class_size(struct zs_pool *pool)
1206	{
1207	return huge_class_size;
1208	}
1209	EXPORT_SYMBOL_GPL(zs_huge_class_size);
1210
1211	static unsigned long obj_malloc(struct zs_pool *pool,
1212	struct zspage zspage, unsigned* long handle)
1213	{
1214	int i, nr_zpdesc, offset;
1215	unsigned long obj;
1216	struct link_free *link;
1217	struct size_class *class;
1218
1219	struct zpdesc *m_zpdesc;
1220	unsigned long m_offset;
1221	void *vaddr;
1222
1223	class = pool->size_class[zspage->class];
1224	obj = get_freeobj(zspage);
1225
1226	offset = obj * class->size;
1227	nr_zpdesc = offset >> PAGE_SHIFT;
1228	m_offset = offset_in_page(offset);
1229	m_zpdesc = get_first_zpdesc(zspage);
1230
1231	for (i = `0`; i < nr_zpdesc; i++)
1232	m_zpdesc = get_next_zpdesc(zpdesc: m_zpdesc);
1233
1234	vaddr = kmap_local_zpdesc(zpdesc: m_zpdesc);
1235	link = (struct link_free )vaddr + m_offset / sizeof(link);
1236	set_freeobj(zspage, obj: link->next >> OBJ_TAG_BITS);
1237	if (likely(!ZsHugePage(zspage)))
1238	/ record handle in the header of allocated chunk /
1239	link->handle = handle \| OBJ_ALLOCATED_TAG;
1240	else
1241	zspage->first_zpdesc->handle = handle \| OBJ_ALLOCATED_TAG;
1242
1243	kunmap_local(vaddr);
1244	mod_zspage_inuse(zspage, val: `1`);
1245
1246	obj = location_to_obj(zpdesc: m_zpdesc, obj_idx: obj);
1247	record_obj(handle, obj);
1248
1249	return obj;
1250	}
1251
1252
1253	/**
1254	* zs_malloc - Allocate block of given size from pool.
1255	* @pool: pool to allocate from
1256	* @size: size of block to allocate
1257	* @gfp: gfp flags when allocating object
1258	* @nid: The preferred node id to allocate new zspage (if needed)
1259	*
1260	* On success, handle to the allocated object is returned,
1261	* otherwise an ERR_PTR().
1262	* Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.
1263	*/
1264	unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp,
1265	const int nid)
1266	{
1267	unsigned long handle;
1268	struct size_class *class;
1269	int newfg;
1270	struct zspage *zspage;
1271
1272	if (unlikely(!size))
1273	return (unsigned long)ERR_PTR(error: -EINVAL);
1274
1275	if (unlikely(size > ZS_MAX_ALLOC_SIZE))
1276	return (unsigned long)ERR_PTR(error: -ENOSPC);
1277
1278	handle = cache_alloc_handle(pool, gfp);
1279	if (!handle)
1280	return (unsigned long)ERR_PTR(error: -ENOMEM);
1281
1282	/ extra space in chunk to keep the handle /
1283	size += ZS_HANDLE_SIZE;
1284	class = pool->size_class[get_size_class_index(size)];
1285
1286	/ class->lock effectively protects the zpage migration /
1287	spin_lock(lock: &class->lock);
1288	zspage = find_get_zspage(class);
1289	if (likely(zspage)) {
1290	obj_malloc(pool, zspage, handle);
1291	/ Now move the zspage to another fullness group, if required /
1292	fix_fullness_group(class, zspage);
1293	class_stat_add(class, type: ZS_OBJS_INUSE, cnt: `1`);
1294
1295	goto out;
1296	}
1297
1298	spin_unlock(lock: &class->lock);
1299
1300	zspage = alloc_zspage(pool, class, gfp, nid);
1301	if (!zspage) {
1302	cache_free_handle(pool, handle);
1303	return (unsigned long)ERR_PTR(error: -ENOMEM);
1304	}
1305
1306	spin_lock(lock: &class->lock);
1307	obj_malloc(pool, zspage, handle);
1308	newfg = get_fullness_group(class, zspage);
1309	insert_zspage(class, zspage, fullness: newfg);
1310	atomic_long_add(i: class->pages_per_zspage, v: &pool->pages_allocated);
1311	class_stat_add(class, type: ZS_OBJS_ALLOCATED, cnt: class->objs_per_zspage);
1312	class_stat_add(class, type: ZS_OBJS_INUSE, cnt: `1`);
1313
1314	/ We completely set up zspage so mark them as movable /
1315	SetZsPageMovable(pool, zspage);
1316	out:
1317	spin_unlock(lock: &class->lock);
1318
1319	return handle;
1320	}
1321	EXPORT_SYMBOL_GPL(zs_malloc);
1322
1323	static void obj_free(int class_size, unsigned long obj)
1324	{
1325	struct link_free *link;
1326	struct zspage *zspage;
1327	struct zpdesc *f_zpdesc;
1328	unsigned long f_offset;
1329	unsigned int f_objidx;
1330	void *vaddr;
1331
1332
1333	obj_to_location(obj, zpdesc: &f_zpdesc, obj_idx: &f_objidx);
1334	f_offset = offset_in_page(class_size * f_objidx);
1335	zspage = get_zspage(zpdesc: f_zpdesc);
1336
1337	vaddr = kmap_local_zpdesc(zpdesc: f_zpdesc);
1338	link = (struct link_free *)(vaddr + f_offset);
1339
1340	/ Insert this object in containing zspage's freelist /
1341	if (likely(!ZsHugePage(zspage)))
1342	link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
1343	else
1344	f_zpdesc->handle = `0`;
1345	set_freeobj(zspage, obj: f_objidx);
1346
1347	kunmap_local(vaddr);
1348	mod_zspage_inuse(zspage, val: -`1`);
1349	}
1350
1351	void zs_free(struct zs_pool pool, unsigned* long handle)
1352	{
1353	struct zspage *zspage;
1354	struct zpdesc *f_zpdesc;
1355	unsigned long obj;
1356	struct size_class *class;
1357	int fullness;
1358
1359	if (IS_ERR_OR_NULL(ptr: (void *)handle))
1360	return;
1361
1362	/*
1363	* The pool->lock protects the race with zpage's migration
1364	* so it's safe to get the page from handle.
1365	*/
1366	read_lock(&pool->lock);
1367	obj = handle_to_obj(handle);
1368	obj_to_zpdesc(obj, zpdesc: &f_zpdesc);
1369	zspage = get_zspage(zpdesc: f_zpdesc);
1370	class = zspage_class(pool, zspage);
1371	spin_lock(lock: &class->lock);
1372	read_unlock(&pool->lock);
1373
1374	class_stat_sub(class, type: ZS_OBJS_INUSE, cnt: `1`);
1375	obj_free(class_size: class->size, obj);
1376
1377	fullness = fix_fullness_group(class, zspage);
1378	if (fullness == ZS_INUSE_RATIO_0)
1379	free_zspage(pool, class, zspage);
1380
1381	spin_unlock(lock: &class->lock);
1382	cache_free_handle(pool, handle);
1383	}
1384	EXPORT_SYMBOL_GPL(zs_free);
1385
1386	static void zs_object_copy(struct size_class class, unsigned* long dst,
1387	unsigned long src)
1388	{
1389	struct zpdesc s_zpdesc, d_zpdesc;
1390	unsigned int s_objidx, d_objidx;
1391	unsigned long s_off, d_off;
1392	void s_addr, d_addr;
1393	int s_size, d_size, size;
1394	int written = `0`;
1395
1396	s_size = d_size = class->size;
1397
1398	obj_to_location(obj: src, zpdesc: &s_zpdesc, obj_idx: &s_objidx);
1399	obj_to_location(obj: dst, zpdesc: &d_zpdesc, obj_idx: &d_objidx);
1400
1401	s_off = offset_in_page(class->size * s_objidx);
1402	d_off = offset_in_page(class->size * d_objidx);
1403
1404	if (s_off + class->size > PAGE_SIZE)
1405	s_size = PAGE_SIZE - s_off;
1406
1407	if (d_off + class->size > PAGE_SIZE)
1408	d_size = PAGE_SIZE - d_off;
1409
1410	s_addr = kmap_local_zpdesc(zpdesc: s_zpdesc);
1411	d_addr = kmap_local_zpdesc(zpdesc: d_zpdesc);
1412
1413	while (`1`) {
1414	size = min(s_size, d_size);
1415	memcpy(d_addr + d_off, s_addr + s_off, size);
1416	written += size;
1417
1418	if (written == class->size)
1419	break;
1420
1421	s_off += size;
1422	s_size -= size;
1423	d_off += size;
1424	d_size -= size;
1425
1426	/*
1427	* Calling kunmap_local(d_addr) is necessary. kunmap_local()
1428	* calls must occurs in reverse order of calls to kmap_local_page().
1429	* So, to call kunmap_local(s_addr) we should first call
1430	* kunmap_local(d_addr). For more details see
1431	* Documentation/mm/highmem.rst.
1432	*/
1433	if (s_off >= PAGE_SIZE) {
1434	kunmap_local(d_addr);
1435	kunmap_local(s_addr);
1436	s_zpdesc = get_next_zpdesc(zpdesc: s_zpdesc);
1437	s_addr = kmap_local_zpdesc(zpdesc: s_zpdesc);
1438	d_addr = kmap_local_zpdesc(zpdesc: d_zpdesc);
1439	s_size = class->size - written;
1440	s_off = `0`;
1441	}
1442
1443	if (d_off >= PAGE_SIZE) {
1444	kunmap_local(d_addr);
1445	d_zpdesc = get_next_zpdesc(zpdesc: d_zpdesc);
1446	d_addr = kmap_local_zpdesc(zpdesc: d_zpdesc);
1447	d_size = class->size - written;
1448	d_off = `0`;
1449	}
1450	}
1451
1452	kunmap_local(d_addr);
1453	kunmap_local(s_addr);
1454	}
1455
1456	/*
1457	* Find alloced object in zspage from index object and
1458	* return handle.
1459	*/
1460	static unsigned long find_alloced_obj(struct size_class *class,
1461	struct zpdesc zpdesc, int* *obj_idx)
1462	{
1463	unsigned int offset;
1464	int index = *obj_idx;
1465	unsigned long handle = `0`;
1466	void *addr = kmap_local_zpdesc(zpdesc);
1467
1468	offset = get_first_obj_offset(zpdesc);
1469	offset += class->size * index;
1470
1471	while (offset < PAGE_SIZE) {
1472	if (obj_allocated(zpdesc, obj: addr + offset, phandle: &handle))
1473	break;
1474
1475	offset += class->size;
1476	index++;
1477	}
1478
1479	kunmap_local(addr);
1480
1481	*obj_idx = index;
1482
1483	return handle;
1484	}
1485
1486	static void migrate_zspage(struct zs_pool pool, struct* zspage *src_zspage,
1487	struct zspage *dst_zspage)
1488	{
1489	unsigned long used_obj, free_obj;
1490	unsigned long handle;
1491	int obj_idx = `0`;
1492	struct zpdesc *s_zpdesc = get_first_zpdesc(zspage: src_zspage);
1493	struct size_class *class = pool->size_class[src_zspage->class];
1494
1495	while (`1`) {
1496	handle = find_alloced_obj(class, zpdesc: s_zpdesc, obj_idx: &obj_idx);
1497	if (!handle) {
1498	s_zpdesc = get_next_zpdesc(zpdesc: s_zpdesc);
1499	if (!s_zpdesc)
1500	break;
1501	obj_idx = `0`;
1502	continue;
1503	}
1504
1505	used_obj = handle_to_obj(handle);
1506	free_obj = obj_malloc(pool, zspage: dst_zspage, handle);
1507	zs_object_copy(class, dst: free_obj, src: used_obj);
1508	obj_idx++;
1509	obj_free(class_size: class->size, obj: used_obj);
1510
1511	/ Stop if there is no more space /
1512	if (zspage_full(class, zspage: dst_zspage))
1513	break;
1514
1515	/ Stop if there are no more objects to migrate /
1516	if (zspage_empty(zspage: src_zspage))
1517	break;
1518	}
1519	}
1520
1521	static struct zspage isolate_src_zspage(struct* size_class *class)
1522	{
1523	struct zspage *zspage;
1524	int fg;
1525
1526	for (fg = ZS_INUSE_RATIO_10; fg <= ZS_INUSE_RATIO_99; fg++) {
1527	zspage = list_first_entry_or_null(&class->fullness_list[fg],
1528	struct zspage, list);
1529	if (zspage) {
1530	remove_zspage(class, zspage);
1531	return zspage;
1532	}
1533	}
1534
1535	return zspage;
1536	}
1537
1538	static struct zspage isolate_dst_zspage(struct* size_class *class)
1539	{
1540	struct zspage *zspage;
1541	int fg;
1542
1543	for (fg = ZS_INUSE_RATIO_99; fg >= ZS_INUSE_RATIO_10; fg--) {
1544	zspage = list_first_entry_or_null(&class->fullness_list[fg],
1545	struct zspage, list);
1546	if (zspage) {
1547	remove_zspage(class, zspage);
1548	return zspage;
1549	}
1550	}
1551
1552	return zspage;
1553	}
1554
1555	/*
1556	* putback_zspage - add @zspage into right class's fullness list
1557	* @class: destination class
1558	* @zspage: target page
1559	*
1560	* Return @zspage's fullness status
1561	*/
1562	static int putback_zspage(struct size_class class, struct* zspage *zspage)
1563	{
1564	int fullness;
1565
1566	fullness = get_fullness_group(class, zspage);
1567	insert_zspage(class, zspage, fullness);
1568
1569	return fullness;
1570	}
1571
1572	#ifdef CONFIG_COMPACTION
1573	/*
1574	* To prevent zspage destroy during migration, zspage freeing should
1575	* hold locks of all pages in the zspage.
1576	*/
1577	static void lock_zspage(struct zspage *zspage)
1578	{
1579	struct zpdesc curr_zpdesc, zpdesc;
1580
1581	/*
1582	* Pages we haven't locked yet can be migrated off the list while we're
1583	* trying to lock them, so we need to be careful and only attempt to
1584	* lock each page under zspage_read_lock(). Otherwise, the page we lock
1585	* may no longer belong to the zspage. This means that we may wait for
1586	* the wrong page to unlock, so we must take a reference to the page
1587	* prior to waiting for it to unlock outside zspage_read_lock().
1588	*/
1589	while (`1`) {
1590	zspage_read_lock(zspage);
1591	zpdesc = get_first_zpdesc(zspage);
1592	if (zpdesc_trylock(zpdesc))
1593	break;
1594	zpdesc_get(zpdesc);
1595	zspage_read_unlock(zspage);
1596	zpdesc_wait_locked(zpdesc);
1597	zpdesc_put(zpdesc);
1598	}
1599
1600	curr_zpdesc = zpdesc;
1601	while ((zpdesc = get_next_zpdesc(zpdesc: curr_zpdesc))) {
1602	if (zpdesc_trylock(zpdesc)) {
1603	curr_zpdesc = zpdesc;
1604	} else {
1605	zpdesc_get(zpdesc);
1606	zspage_read_unlock(zspage);
1607	zpdesc_wait_locked(zpdesc);
1608	zpdesc_put(zpdesc);
1609	zspage_read_lock(zspage);
1610	}
1611	}
1612	zspage_read_unlock(zspage);
1613	}
1614	#endif /* CONFIG_COMPACTION */
1615
1616	#ifdef CONFIG_COMPACTION
1617
1618	static void replace_sub_page(struct size_class class, struct* zspage *zspage,
1619	struct zpdesc newzpdesc, struct* zpdesc *oldzpdesc)
1620	{
1621	struct zpdesc *zpdesc;
1622	struct zpdesc *zpdescs[ZS_MAX_PAGES_PER_ZSPAGE] = {NULL, };
1623	unsigned int first_obj_offset;
1624	int idx = `0`;
1625
1626	zpdesc = get_first_zpdesc(zspage);
1627	do {
1628	if (zpdesc == oldzpdesc)
1629	zpdescs[idx] = newzpdesc;
1630	else
1631	zpdescs[idx] = zpdesc;
1632	idx++;
1633	} while ((zpdesc = get_next_zpdesc(zpdesc)) != NULL);
1634
1635	create_page_chain(class, zspage, zpdescs);
1636	first_obj_offset = get_first_obj_offset(zpdesc: oldzpdesc);
1637	set_first_obj_offset(zpdesc: newzpdesc, offset: first_obj_offset);
1638	if (unlikely(ZsHugePage(zspage)))
1639	newzpdesc->handle = oldzpdesc->handle;
1640	__zpdesc_set_movable(zpdesc: newzpdesc);
1641	}
1642
1643	static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
1644	{
1645	/*
1646	* Page is locked so zspage can't be destroyed concurrently
1647	* (see free_zspage()). But if the page was already destroyed
1648	* (see reset_zpdesc()), refuse isolation here.
1649	*/
1650	return page_zpdesc(page)->zspage;
1651	}
1652
1653	static int zs_page_migrate(struct page newpage, struct* page *page,
1654	enum migrate_mode mode)
1655	{
1656	struct zs_pool *pool;
1657	struct size_class *class;
1658	struct zspage *zspage;
1659	struct zpdesc *dummy;
1660	struct zpdesc *newzpdesc = page_zpdesc(newpage);
1661	struct zpdesc *zpdesc = page_zpdesc(page);
1662	void s_addr, d_addr, *addr;
1663	unsigned int offset;
1664	unsigned long handle;
1665	unsigned long old_obj, new_obj;
1666	unsigned int obj_idx;
1667
1668	/*
1669	* TODO: nothing prevents a zspage from getting destroyed while
1670	* it is isolated for migration, as the page lock is temporarily
1671	* dropped after zs_page_isolate() succeeded: we should rework that
1672	* and defer destroying such pages once they are un-isolated (putback)
1673	* instead.
1674	*/
1675	if (!zpdesc->zspage)
1676	return `0`;
1677
1678	/ The page is locked, so this pointer must remain valid /
1679	zspage = get_zspage(zpdesc);
1680	pool = zspage->pool;
1681
1682	/*
1683	* The pool migrate_lock protects the race between zpage migration
1684	* and zs_free.
1685	*/
1686	write_lock(&pool->lock);
1687	class = zspage_class(pool, zspage);
1688
1689	/*
1690	* the class lock protects zpage alloc/free in the zspage.
1691	*/
1692	spin_lock(lock: &class->lock);
1693	/ the zspage write_lock protects zpage access via zs_obj_read/write() /
1694	if (!zspage_write_trylock(zspage)) {
1695	spin_unlock(lock: &class->lock);
1696	write_unlock(&pool->lock);
1697	return -EINVAL;
1698	}
1699
1700	/ We're committed, tell the world that this is a Zsmalloc page. /
1701	__zpdesc_set_zsmalloc(zpdesc: newzpdesc);
1702
1703	offset = get_first_obj_offset(zpdesc);
1704	s_addr = kmap_local_zpdesc(zpdesc);
1705
1706	/*
1707	* Here, any user cannot access all objects in the zspage so let's move.
1708	*/
1709	d_addr = kmap_local_zpdesc(zpdesc: newzpdesc);
1710	copy_page(to: d_addr, from: s_addr);
1711	kunmap_local(d_addr);
1712
1713	for (addr = s_addr + offset; addr < s_addr + PAGE_SIZE;
1714	addr += class->size) {
1715	if (obj_allocated(zpdesc, obj: addr, phandle: &handle)) {
1716
1717	old_obj = handle_to_obj(handle);
1718	obj_to_location(obj: old_obj, zpdesc: &dummy, obj_idx: &obj_idx);
1719	new_obj = (unsigned long)location_to_obj(zpdesc: newzpdesc, obj_idx);
1720	record_obj(handle, obj: new_obj);
1721	}
1722	}
1723	kunmap_local(s_addr);
1724
1725	replace_sub_page(class, zspage, newzpdesc, oldzpdesc: zpdesc);
1726	/*
1727	* Since we complete the data copy and set up new zspage structure,
1728	* it's okay to release migration_lock.
1729	*/
1730	write_unlock(&pool->lock);
1731	spin_unlock(lock: &class->lock);
1732	zspage_write_unlock(zspage);
1733
1734	zpdesc_get(zpdesc: newzpdesc);
1735	if (zpdesc_zone(zpdesc: newzpdesc) != zpdesc_zone(zpdesc)) {
1736	zpdesc_dec_zone_page_state(zpdesc);
1737	zpdesc_inc_zone_page_state(zpdesc: newzpdesc);
1738	}
1739
1740	reset_zpdesc(zpdesc);
1741	zpdesc_put(zpdesc);
1742
1743	return `0`;
1744	}
1745
1746	static void zs_page_putback(struct page *page)
1747	{
1748	}
1749
1750	const struct movable_operations zsmalloc_mops = {
1751	.isolate_page = zs_page_isolate,
1752	.migrate_page = zs_page_migrate,
1753	.putback_page = zs_page_putback,
1754	};
1755
1756	/*
1757	* Caller should hold page_lock of all pages in the zspage
1758	* In here, we cannot use zspage meta data.
1759	*/
1760	static void async_free_zspage(struct work_struct *work)
1761	{
1762	int i;
1763	struct size_class *class;
1764	struct zspage zspage, tmp;
1765	LIST_HEAD(free_pages);
1766	struct zs_pool pool = container_of(work, struct* zs_pool,
1767	free_work);
1768
1769	for (i = `0`; i < ZS_SIZE_CLASSES; i++) {
1770	class = pool->size_class[i];
1771	if (class->index != i)
1772	continue;
1773
1774	spin_lock(lock: &class->lock);
1775	list_splice_init(list: &class->fullness_list[ZS_INUSE_RATIO_0],
1776	head: &free_pages);
1777	spin_unlock(lock: &class->lock);
1778	}
1779
1780	list_for_each_entry_safe(zspage, tmp, &free_pages, list) {
1781	list_del(entry: &zspage->list);
1782	lock_zspage(zspage);
1783
1784	class = zspage_class(pool, zspage);
1785	spin_lock(lock: &class->lock);
1786	class_stat_sub(class, type: ZS_INUSE_RATIO_0, cnt: `1`);
1787	__free_zspage(pool, class, zspage);
1788	spin_unlock(lock: &class->lock);
1789	}
1790	};
1791
1792	static void kick_deferred_free(struct zs_pool *pool)
1793	{
1794	schedule_work(work: &pool->free_work);
1795	}
1796
1797	static void zs_flush_migration(struct zs_pool *pool)
1798	{
1799	flush_work(work: &pool->free_work);
1800	}
1801
1802	static void init_deferred_free(struct zs_pool *pool)
1803	{
1804	INIT_WORK(&pool->free_work, async_free_zspage);
1805	}
1806
1807	static void SetZsPageMovable(struct zs_pool pool, struct* zspage *zspage)
1808	{
1809	struct zpdesc *zpdesc = get_first_zpdesc(zspage);
1810
1811	do {
1812	WARN_ON(!zpdesc_trylock(zpdesc));
1813	__zpdesc_set_movable(zpdesc);
1814	zpdesc_unlock(zpdesc);
1815	} while ((zpdesc = get_next_zpdesc(zpdesc)) != NULL);
1816	}
1817	#else
1818	static inline void zs_flush_migration(struct zs_pool *pool) { }
1819	#endif
1820
1821	/*
1822	*
1823	* Based on the number of unused allocated objects calculate
1824	* and return the number of pages that we can free.
1825	*/
1826	static unsigned long zs_can_compact(struct size_class *class)
1827	{
1828	unsigned long obj_wasted;
1829	unsigned long obj_allocated = class_stat_read(class, type: ZS_OBJS_ALLOCATED);
1830	unsigned long obj_used = class_stat_read(class, type: ZS_OBJS_INUSE);
1831
1832	if (obj_allocated <= obj_used)
1833	return `0`;
1834
1835	obj_wasted = obj_allocated - obj_used;
1836	obj_wasted /= class->objs_per_zspage;
1837
1838	return obj_wasted * class->pages_per_zspage;
1839	}
1840
1841	static unsigned long __zs_compact(struct zs_pool *pool,
1842	struct size_class *class)
1843	{
1844	struct zspage *src_zspage = NULL;
1845	struct zspage *dst_zspage = NULL;
1846	unsigned long pages_freed = `0`;
1847
1848	/*
1849	* protect the race between zpage migration and zs_free
1850	* as well as zpage allocation/free
1851	*/
1852	write_lock(&pool->lock);
1853	spin_lock(lock: &class->lock);
1854	while (zs_can_compact(class)) {
1855	int fg;
1856
1857	if (!dst_zspage) {
1858	dst_zspage = isolate_dst_zspage(class);
1859	if (!dst_zspage)
1860	break;
1861	}
1862
1863	src_zspage = isolate_src_zspage(class);
1864	if (!src_zspage)
1865	break;
1866
1867	if (!zspage_write_trylock(zspage: src_zspage))
1868	break;
1869
1870	migrate_zspage(pool, src_zspage, dst_zspage);
1871	zspage_write_unlock(zspage: src_zspage);
1872
1873	fg = putback_zspage(class, zspage: src_zspage);
1874	if (fg == ZS_INUSE_RATIO_0) {
1875	free_zspage(pool, class, zspage: src_zspage);
1876	pages_freed += class->pages_per_zspage;
1877	}
1878	src_zspage = NULL;
1879
1880	if (get_fullness_group(class, zspage: dst_zspage) == ZS_INUSE_RATIO_100
1881	\|\| rwlock_is_contended(&pool->lock)) {
1882	putback_zspage(class, zspage: dst_zspage);
1883	dst_zspage = NULL;
1884
1885	spin_unlock(lock: &class->lock);
1886	write_unlock(&pool->lock);
1887	cond_resched();
1888	write_lock(&pool->lock);
1889	spin_lock(lock: &class->lock);
1890	}
1891	}
1892
1893	if (src_zspage)
1894	putback_zspage(class, zspage: src_zspage);
1895
1896	if (dst_zspage)
1897	putback_zspage(class, zspage: dst_zspage);
1898
1899	spin_unlock(lock: &class->lock);
1900	write_unlock(&pool->lock);
1901
1902	return pages_freed;
1903	}
1904
1905	unsigned long zs_compact(struct zs_pool *pool)
1906	{
1907	int i;
1908	struct size_class *class;
1909	unsigned long pages_freed = `0`;
1910
1911	/*
1912	* Pool compaction is performed under pool->lock so it is basically
1913	* single-threaded. Having more than one thread in __zs_compact()
1914	* will increase pool->lock contention, which will impact other
1915	* zsmalloc operations that need pool->lock.
1916	*/
1917	if (atomic_xchg(v: &pool->compaction_in_progress, new: `1`))
1918	return `0`;
1919
1920	for (i = ZS_SIZE_CLASSES - `1`; i >= `0`; i--) {
1921	class = pool->size_class[i];
1922	if (class->index != i)
1923	continue;
1924	pages_freed += __zs_compact(pool, class);
1925	}
1926	atomic_long_add(i: pages_freed, v: &pool->stats.pages_compacted);
1927	atomic_set(v: &pool->compaction_in_progress, i: `0`);
1928
1929	return pages_freed;
1930	}
1931	EXPORT_SYMBOL_GPL(zs_compact);
1932
1933	void zs_pool_stats(struct zs_pool pool, struct* zs_pool_stats *stats)
1934	{
1935	memcpy(stats, &pool->stats, sizeof(struct zs_pool_stats));
1936	}
1937	EXPORT_SYMBOL_GPL(zs_pool_stats);
1938
1939	static unsigned long zs_shrinker_scan(struct shrinker *shrinker,
1940	struct shrink_control *sc)
1941	{
1942	unsigned long pages_freed;
1943	struct zs_pool *pool = shrinker->private_data;
1944
1945	/*
1946	* Compact classes and calculate compaction delta.
1947	* Can run concurrently with a manually triggered
1948	* (by user) compaction.
1949	*/
1950	pages_freed = zs_compact(pool);
1951
1952	return pages_freed ? pages_freed : SHRINK_STOP;
1953	}
1954
1955	static unsigned long zs_shrinker_count(struct shrinker *shrinker,
1956	struct shrink_control *sc)
1957	{
1958	int i;
1959	struct size_class *class;
1960	unsigned long pages_to_free = `0`;
1961	struct zs_pool *pool = shrinker->private_data;
1962
1963	for (i = ZS_SIZE_CLASSES - `1`; i >= `0`; i--) {
1964	class = pool->size_class[i];
1965	if (class->index != i)
1966	continue;
1967
1968	pages_to_free += zs_can_compact(class);
1969	}
1970
1971	return pages_to_free;
1972	}
1973
1974	static void zs_unregister_shrinker(struct zs_pool *pool)
1975	{
1976	shrinker_free(shrinker: pool->shrinker);
1977	}
1978
1979	static int zs_register_shrinker(struct zs_pool *pool)
1980	{
1981	pool->shrinker = shrinker_alloc(flags: `0`, fmt: "mm-zspool:%s", pool->name);
1982	if (!pool->shrinker)
1983	return -ENOMEM;
1984
1985	pool->shrinker->scan_objects = zs_shrinker_scan;
1986	pool->shrinker->count_objects = zs_shrinker_count;
1987	pool->shrinker->batch = `0`;
1988	pool->shrinker->private_data = pool;
1989
1990	shrinker_register(shrinker: pool->shrinker);
1991
1992	return `0`;
1993	}
1994
1995	static int calculate_zspage_chain_size(int class_size)
1996	{
1997	int i, min_waste = INT_MAX;
1998	int chain_size = `1`;
1999
2000	if (is_power_of_2(n: class_size))
2001	return chain_size;
2002
2003	for (i = `1`; i <= ZS_MAX_PAGES_PER_ZSPAGE; i++) {
2004	int waste;
2005
2006	waste = (i * PAGE_SIZE) % class_size;
2007	if (waste < min_waste) {
2008	min_waste = waste;
2009	chain_size = i;
2010	}
2011	}
2012
2013	return chain_size;
2014	}
2015
2016	/**
2017	* zs_create_pool - Creates an allocation pool to work from.
2018	* @name: pool name to be created
2019	*
2020	* This function must be called before anything when using
2021	* the zsmalloc allocator.
2022	*
2023	* On success, a pointer to the newly created pool is returned,
2024	* otherwise NULL.
2025	*/
2026	struct zs_pool zs_create_pool(const* char *name)
2027	{
2028	int i;
2029	struct zs_pool *pool;
2030	struct size_class *prev_class = NULL;
2031
2032	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
2033	if (!pool)
2034	return NULL;
2035
2036	init_deferred_free(pool);
2037	rwlock_init(&pool->lock);
2038	atomic_set(v: &pool->compaction_in_progress, i: `0`);
2039
2040	pool->name = kstrdup(s: name, GFP_KERNEL);
2041	if (!pool->name)
2042	goto err;
2043
2044	if (create_cache(pool))
2045	goto err;
2046
2047	/*
2048	* Iterate reversely, because, size of size_class that we want to use
2049	* for merging should be larger or equal to current size.
2050	*/
2051	for (i = ZS_SIZE_CLASSES - `1`; i >= `0`; i--) {
2052	int size;
2053	int pages_per_zspage;
2054	int objs_per_zspage;
2055	struct size_class *class;
2056	int fullness;
2057
2058	size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
2059	if (size > ZS_MAX_ALLOC_SIZE)
2060	size = ZS_MAX_ALLOC_SIZE;
2061	pages_per_zspage = calculate_zspage_chain_size(class_size: size);
2062	objs_per_zspage = pages_per_zspage * PAGE_SIZE / size;
2063
2064	/*
2065	* We iterate from biggest down to smallest classes,
2066	* so huge_class_size holds the size of the first huge
2067	* class. Any object bigger than or equal to that will
2068	* endup in the huge class.
2069	*/
2070	if (pages_per_zspage != `1` && objs_per_zspage != `1` &&
2071	!huge_class_size) {
2072	huge_class_size = size;
2073	/*
2074	* The object uses ZS_HANDLE_SIZE bytes to store the
2075	* handle. We need to subtract it, because zs_malloc()
2076	* unconditionally adds handle size before it performs
2077	* size class search - so object may be smaller than
2078	* huge class size, yet it still can end up in the huge
2079	* class because it grows by ZS_HANDLE_SIZE extra bytes
2080	* right before class lookup.
2081	*/
2082	huge_class_size -= (ZS_HANDLE_SIZE - `1`);
2083	}
2084
2085	/*
2086	* size_class is used for normal zsmalloc operation such
2087	* as alloc/free for that size. Although it is natural that we
2088	* have one size_class for each size, there is a chance that we
2089	* can get more memory utilization if we use one size_class for
2090	* many different sizes whose size_class have same
2091	* characteristics. So, we makes size_class point to
2092	* previous size_class if possible.
2093	*/
2094	if (prev_class) {
2095	if (can_merge(prev: prev_class, pages_per_zspage, objs_per_zspage)) {
2096	pool->size_class[i] = prev_class;
2097	continue;
2098	}
2099	}
2100
2101	class = kzalloc(sizeof(struct size_class), GFP_KERNEL);
2102	if (!class)
2103	goto err;
2104
2105	class->size = size;
2106	class->index = i;
2107	class->pages_per_zspage = pages_per_zspage;
2108	class->objs_per_zspage = objs_per_zspage;
2109	spin_lock_init(&class->lock);
2110	pool->size_class[i] = class;
2111
2112	fullness = ZS_INUSE_RATIO_0;
2113	while (fullness < NR_FULLNESS_GROUPS) {
2114	INIT_LIST_HEAD(list: &class->fullness_list[fullness]);
2115	fullness++;
2116	}
2117
2118	prev_class = class;
2119	}
2120
2121	/ debug only, don't abort if it fails /
2122	zs_pool_stat_create(pool, name);
2123
2124	/*
2125	* Not critical since shrinker is only used to trigger internal
2126	* defragmentation of the pool which is pretty optional thing. If
2127	* registration fails we still can use the pool normally and user can
2128	* trigger compaction manually. Thus, ignore return code.
2129	*/
2130	zs_register_shrinker(pool);
2131
2132	return pool;
2133
2134	err:
2135	zs_destroy_pool(pool);
2136	return NULL;
2137	}
2138	EXPORT_SYMBOL_GPL(zs_create_pool);
2139
2140	void zs_destroy_pool(struct zs_pool *pool)
2141	{
2142	int i;
2143
2144	zs_unregister_shrinker(pool);
2145	zs_flush_migration(pool);
2146	zs_pool_stat_destroy(pool);
2147
2148	for (i = `0`; i < ZS_SIZE_CLASSES; i++) {
2149	int fg;
2150	struct size_class *class = pool->size_class[i];
2151
2152	if (!class)
2153	continue;
2154
2155	if (class->index != i)
2156	continue;
2157
2158	for (fg = ZS_INUSE_RATIO_0; fg < NR_FULLNESS_GROUPS; fg++) {
2159	if (list_empty(head: &class->fullness_list[fg]))
2160	continue;
2161
2162	pr_err("Class-%d fullness group %d is not empty\n",
2163	class->size, fg);
2164	}
2165	kfree(objp: class);
2166	}
2167
2168	destroy_cache(pool);
2169	kfree(objp: pool->name);
2170	kfree(objp: pool);
2171	}
2172	EXPORT_SYMBOL_GPL(zs_destroy_pool);
2173
2174	static int __init zs_init(void)
2175	{
2176	int rc __maybe_unused;
2177
2178	#ifdef CONFIG_COMPACTION
2179	rc = set_movable_ops(ops: &zsmalloc_mops, type: PGTY_zsmalloc);
2180	if (rc)
2181	return rc;
2182	#endif
2183	zs_stat_init();
2184	return `0`;
2185	}
2186
2187	static void __exit zs_exit(void)
2188	{
2189	#ifdef CONFIG_COMPACTION
2190	set_movable_ops(NULL, type: PGTY_zsmalloc);
2191	#endif
2192	zs_stat_exit();
2193	}
2194
2195	module_init(zs_init);
2196	module_exit(zs_exit);
2197
2198	MODULE_LICENSE("Dual BSD/GPL");
2199	MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
2200	MODULE_DESCRIPTION("zsmalloc memory allocator");
2201

source code of linux/mm/zsmalloc.c