1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Author: Xiang Gao <gaoxiang@loongson.cn> |
4 | * Huacai Chen <chenhuacai@loongson.cn> |
5 | * |
6 | * Copyright (C) 2020-2022 Loongson Technology Corporation Limited |
7 | */ |
8 | #include <linux/init.h> |
9 | #include <linux/kernel.h> |
10 | #include <linux/mm.h> |
11 | #include <linux/mmzone.h> |
12 | #include <linux/export.h> |
13 | #include <linux/nodemask.h> |
14 | #include <linux/swap.h> |
15 | #include <linux/memblock.h> |
16 | #include <linux/pfn.h> |
17 | #include <linux/acpi.h> |
18 | #include <linux/efi.h> |
19 | #include <linux/irq.h> |
20 | #include <linux/pci.h> |
21 | #include <asm/bootinfo.h> |
22 | #include <asm/loongson.h> |
23 | #include <asm/numa.h> |
24 | #include <asm/page.h> |
25 | #include <asm/pgalloc.h> |
26 | #include <asm/sections.h> |
27 | #include <asm/time.h> |
28 | |
29 | int numa_off; |
30 | struct pglist_data *node_data[MAX_NUMNODES]; |
31 | unsigned char node_distances[MAX_NUMNODES][MAX_NUMNODES]; |
32 | |
33 | EXPORT_SYMBOL(node_data); |
34 | EXPORT_SYMBOL(node_distances); |
35 | |
36 | static struct numa_meminfo numa_meminfo; |
37 | cpumask_t cpus_on_node[MAX_NUMNODES]; |
38 | cpumask_t phys_cpus_on_node[MAX_NUMNODES]; |
39 | EXPORT_SYMBOL(cpus_on_node); |
40 | |
41 | /* |
42 | * apicid, cpu, node mappings |
43 | */ |
44 | s16 __cpuid_to_node[CONFIG_NR_CPUS] = { |
45 | [0 ... CONFIG_NR_CPUS - 1] = NUMA_NO_NODE |
46 | }; |
47 | EXPORT_SYMBOL(__cpuid_to_node); |
48 | |
49 | nodemask_t numa_nodes_parsed __initdata; |
50 | |
51 | #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA |
52 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; |
53 | EXPORT_SYMBOL(__per_cpu_offset); |
54 | |
55 | static int __init pcpu_cpu_to_node(int cpu) |
56 | { |
57 | return early_cpu_to_node(cpu); |
58 | } |
59 | |
60 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) |
61 | { |
62 | if (early_cpu_to_node(cpu: from) == early_cpu_to_node(cpu: to)) |
63 | return LOCAL_DISTANCE; |
64 | else |
65 | return REMOTE_DISTANCE; |
66 | } |
67 | |
68 | void __init pcpu_populate_pte(unsigned long addr) |
69 | { |
70 | populate_kernel_pte(addr); |
71 | } |
72 | |
73 | void __init setup_per_cpu_areas(void) |
74 | { |
75 | unsigned long delta; |
76 | unsigned int cpu; |
77 | int rc = -EINVAL; |
78 | |
79 | if (pcpu_chosen_fc == PCPU_FC_AUTO) { |
80 | if (nr_node_ids >= 8) |
81 | pcpu_chosen_fc = PCPU_FC_PAGE; |
82 | else |
83 | pcpu_chosen_fc = PCPU_FC_EMBED; |
84 | } |
85 | |
86 | /* |
87 | * Always reserve area for module percpu variables. That's |
88 | * what the legacy allocator did. |
89 | */ |
90 | if (pcpu_chosen_fc != PCPU_FC_PAGE) { |
91 | rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, |
92 | PERCPU_DYNAMIC_RESERVE, PMD_SIZE, |
93 | cpu_distance_fn: pcpu_cpu_distance, cpu_to_nd_fn: pcpu_cpu_to_node); |
94 | if (rc < 0) |
95 | pr_warn("%s allocator failed (%d), falling back to page size\n" , |
96 | pcpu_fc_names[pcpu_chosen_fc], rc); |
97 | } |
98 | if (rc < 0) |
99 | rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, cpu_to_nd_fn: pcpu_cpu_to_node); |
100 | if (rc < 0) |
101 | panic(fmt: "cannot initialize percpu area (err=%d)" , rc); |
102 | |
103 | delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; |
104 | for_each_possible_cpu(cpu) |
105 | __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; |
106 | } |
107 | #endif |
108 | |
109 | /* |
110 | * Get nodeid by logical cpu number. |
111 | * __cpuid_to_node maps phyical cpu id to node, so we |
112 | * should use cpu_logical_map(cpu) to index it. |
113 | * |
114 | * This routine is only used in early phase during |
115 | * booting, after setup_per_cpu_areas calling and numa_node |
116 | * initialization, cpu_to_node will be used instead. |
117 | */ |
118 | int early_cpu_to_node(int cpu) |
119 | { |
120 | int physid = cpu_logical_map(cpu); |
121 | |
122 | if (physid < 0) |
123 | return NUMA_NO_NODE; |
124 | |
125 | return __cpuid_to_node[physid]; |
126 | } |
127 | |
128 | void __init early_numa_add_cpu(int cpuid, s16 node) |
129 | { |
130 | int cpu = __cpu_number_map[cpuid]; |
131 | |
132 | if (cpu < 0) |
133 | return; |
134 | |
135 | cpumask_set_cpu(cpu, dstp: &cpus_on_node[node]); |
136 | cpumask_set_cpu(cpu: cpuid, dstp: &phys_cpus_on_node[node]); |
137 | } |
138 | |
139 | void numa_add_cpu(unsigned int cpu) |
140 | { |
141 | int nid = cpu_to_node(cpu); |
142 | cpumask_set_cpu(cpu, dstp: &cpus_on_node[nid]); |
143 | } |
144 | |
145 | void numa_remove_cpu(unsigned int cpu) |
146 | { |
147 | int nid = cpu_to_node(cpu); |
148 | cpumask_clear_cpu(cpu, dstp: &cpus_on_node[nid]); |
149 | } |
150 | |
151 | static int __init numa_add_memblk_to(int nid, u64 start, u64 end, |
152 | struct numa_meminfo *mi) |
153 | { |
154 | /* ignore zero length blks */ |
155 | if (start == end) |
156 | return 0; |
157 | |
158 | /* whine about and ignore invalid blks */ |
159 | if (start > end || nid < 0 || nid >= MAX_NUMNODES) { |
160 | pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n" , |
161 | nid, start, end - 1); |
162 | return 0; |
163 | } |
164 | |
165 | if (mi->nr_blks >= NR_NODE_MEMBLKS) { |
166 | pr_err("NUMA: too many memblk ranges\n" ); |
167 | return -EINVAL; |
168 | } |
169 | |
170 | mi->blk[mi->nr_blks].start = PFN_ALIGN(start); |
171 | mi->blk[mi->nr_blks].end = PFN_ALIGN(end - PAGE_SIZE + 1); |
172 | mi->blk[mi->nr_blks].nid = nid; |
173 | mi->nr_blks++; |
174 | return 0; |
175 | } |
176 | |
177 | /** |
178 | * numa_add_memblk - Add one numa_memblk to numa_meminfo |
179 | * @nid: NUMA node ID of the new memblk |
180 | * @start: Start address of the new memblk |
181 | * @end: End address of the new memblk |
182 | * |
183 | * Add a new memblk to the default numa_meminfo. |
184 | * |
185 | * RETURNS: |
186 | * 0 on success, -errno on failure. |
187 | */ |
188 | int __init numa_add_memblk(int nid, u64 start, u64 end) |
189 | { |
190 | return numa_add_memblk_to(nid, start, end, mi: &numa_meminfo); |
191 | } |
192 | |
193 | static void __init alloc_node_data(int nid) |
194 | { |
195 | void *nd; |
196 | unsigned long nd_pa; |
197 | size_t nd_sz = roundup(sizeof(pg_data_t), PAGE_SIZE); |
198 | |
199 | nd_pa = memblock_phys_alloc_try_nid(size: nd_sz, SMP_CACHE_BYTES, nid); |
200 | if (!nd_pa) { |
201 | pr_err("Cannot find %zu Byte for node_data (initial node: %d)\n" , nd_sz, nid); |
202 | return; |
203 | } |
204 | |
205 | nd = __va(nd_pa); |
206 | |
207 | node_data[nid] = nd; |
208 | memset(nd, 0, sizeof(pg_data_t)); |
209 | } |
210 | |
211 | static void __init node_mem_init(unsigned int node) |
212 | { |
213 | unsigned long start_pfn, end_pfn; |
214 | unsigned long node_addrspace_offset; |
215 | |
216 | node_addrspace_offset = nid_to_addrbase(node); |
217 | pr_info("Node%d's addrspace_offset is 0x%lx\n" , |
218 | node, node_addrspace_offset); |
219 | |
220 | get_pfn_range_for_nid(nid: node, start_pfn: &start_pfn, end_pfn: &end_pfn); |
221 | pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx\n" , |
222 | node, start_pfn, end_pfn); |
223 | |
224 | alloc_node_data(nid: node); |
225 | } |
226 | |
227 | #ifdef CONFIG_ACPI_NUMA |
228 | |
229 | static void __init add_node_intersection(u32 node, u64 start, u64 size, u32 type) |
230 | { |
231 | static unsigned long num_physpages; |
232 | |
233 | num_physpages += (size >> PAGE_SHIFT); |
234 | pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n" , |
235 | node, type, start, size); |
236 | pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n" , |
237 | start >> PAGE_SHIFT, (start + size) >> PAGE_SHIFT, num_physpages); |
238 | memblock_set_node(base: start, size, type: &memblock.memory, nid: node); |
239 | } |
240 | |
241 | /* |
242 | * add_numamem_region |
243 | * |
244 | * Add a uasable memory region described by BIOS. The |
245 | * routine gets each intersection between BIOS's region |
246 | * and node's region, and adds them into node's memblock |
247 | * pool. |
248 | * |
249 | */ |
250 | static void __init add_numamem_region(u64 start, u64 end, u32 type) |
251 | { |
252 | u32 i; |
253 | u64 ofs = start; |
254 | |
255 | if (start >= end) { |
256 | pr_debug("Invalid region: %016llx-%016llx\n" , start, end); |
257 | return; |
258 | } |
259 | |
260 | for (i = 0; i < numa_meminfo.nr_blks; i++) { |
261 | struct numa_memblk *mb = &numa_meminfo.blk[i]; |
262 | |
263 | if (ofs > mb->end) |
264 | continue; |
265 | |
266 | if (end > mb->end) { |
267 | add_node_intersection(node: mb->nid, start: ofs, size: mb->end - ofs, type); |
268 | ofs = mb->end; |
269 | } else { |
270 | add_node_intersection(node: mb->nid, start: ofs, size: end - ofs, type); |
271 | break; |
272 | } |
273 | } |
274 | } |
275 | |
276 | static void __init init_node_memblock(void) |
277 | { |
278 | u32 mem_type; |
279 | u64 mem_end, mem_start, mem_size; |
280 | efi_memory_desc_t *md; |
281 | |
282 | /* Parse memory information and activate */ |
283 | for_each_efi_memory_desc(md) { |
284 | mem_type = md->type; |
285 | mem_start = md->phys_addr; |
286 | mem_size = md->num_pages << EFI_PAGE_SHIFT; |
287 | mem_end = mem_start + mem_size; |
288 | |
289 | switch (mem_type) { |
290 | case EFI_LOADER_CODE: |
291 | case EFI_LOADER_DATA: |
292 | case EFI_BOOT_SERVICES_CODE: |
293 | case EFI_BOOT_SERVICES_DATA: |
294 | case EFI_PERSISTENT_MEMORY: |
295 | case EFI_CONVENTIONAL_MEMORY: |
296 | add_numamem_region(start: mem_start, end: mem_end, type: mem_type); |
297 | break; |
298 | case EFI_PAL_CODE: |
299 | case EFI_UNUSABLE_MEMORY: |
300 | case EFI_ACPI_RECLAIM_MEMORY: |
301 | add_numamem_region(start: mem_start, end: mem_end, type: mem_type); |
302 | fallthrough; |
303 | case EFI_RESERVED_TYPE: |
304 | case EFI_RUNTIME_SERVICES_CODE: |
305 | case EFI_RUNTIME_SERVICES_DATA: |
306 | case EFI_MEMORY_MAPPED_IO: |
307 | case EFI_MEMORY_MAPPED_IO_PORT_SPACE: |
308 | pr_info("Resvd: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n" , |
309 | mem_type, mem_start, mem_size); |
310 | break; |
311 | } |
312 | } |
313 | } |
314 | |
315 | static void __init numa_default_distance(void) |
316 | { |
317 | int row, col; |
318 | |
319 | for (row = 0; row < MAX_NUMNODES; row++) |
320 | for (col = 0; col < MAX_NUMNODES; col++) { |
321 | if (col == row) |
322 | node_distances[row][col] = LOCAL_DISTANCE; |
323 | else |
324 | /* We assume that one node per package here! |
325 | * |
326 | * A SLIT should be used for multiple nodes |
327 | * per package to override default setting. |
328 | */ |
329 | node_distances[row][col] = REMOTE_DISTANCE; |
330 | } |
331 | } |
332 | |
333 | /* |
334 | * fake_numa_init() - For Non-ACPI systems |
335 | * Return: 0 on success, -errno on failure. |
336 | */ |
337 | static int __init fake_numa_init(void) |
338 | { |
339 | phys_addr_t start = memblock_start_of_DRAM(); |
340 | phys_addr_t end = memblock_end_of_DRAM() - 1; |
341 | |
342 | node_set(0, numa_nodes_parsed); |
343 | pr_info("Faking a node at [mem %pap-%pap]\n" , &start, &end); |
344 | |
345 | return numa_add_memblk(nid: 0, start, end: end + 1); |
346 | } |
347 | |
348 | int __init init_numa_memory(void) |
349 | { |
350 | int i; |
351 | int ret; |
352 | int node; |
353 | |
354 | for (i = 0; i < NR_CPUS; i++) |
355 | set_cpuid_to_node(i, NUMA_NO_NODE); |
356 | |
357 | numa_default_distance(); |
358 | nodes_clear(numa_nodes_parsed); |
359 | nodes_clear(node_possible_map); |
360 | nodes_clear(node_online_map); |
361 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); |
362 | |
363 | /* Parse SRAT and SLIT if provided by firmware. */ |
364 | ret = acpi_disabled ? fake_numa_init() : acpi_numa_init(); |
365 | if (ret < 0) |
366 | return ret; |
367 | |
368 | node_possible_map = numa_nodes_parsed; |
369 | if (WARN_ON(nodes_empty(node_possible_map))) |
370 | return -EINVAL; |
371 | |
372 | init_node_memblock(); |
373 | if (!memblock_validate_numa_coverage(SZ_1M)) |
374 | return -EINVAL; |
375 | |
376 | for_each_node_mask(node, node_possible_map) { |
377 | node_mem_init(node); |
378 | node_set_online(nid: node); |
379 | } |
380 | max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); |
381 | |
382 | setup_nr_node_ids(); |
383 | loongson_sysconf.nr_nodes = nr_node_ids; |
384 | loongson_sysconf.cores_per_node = cpumask_weight(&phys_cpus_on_node[0]); |
385 | |
386 | return 0; |
387 | } |
388 | |
389 | #endif |
390 | |
391 | void __init paging_init(void) |
392 | { |
393 | unsigned int node; |
394 | unsigned long zones_size[MAX_NR_ZONES] = {0, }; |
395 | |
396 | for_each_online_node(node) { |
397 | unsigned long start_pfn, end_pfn; |
398 | |
399 | get_pfn_range_for_nid(nid: node, start_pfn: &start_pfn, end_pfn: &end_pfn); |
400 | |
401 | if (end_pfn > max_low_pfn) |
402 | max_low_pfn = end_pfn; |
403 | } |
404 | #ifdef CONFIG_ZONE_DMA32 |
405 | zones_size[ZONE_DMA32] = MAX_DMA32_PFN; |
406 | #endif |
407 | zones_size[ZONE_NORMAL] = max_low_pfn; |
408 | free_area_init(max_zone_pfn: zones_size); |
409 | } |
410 | |
411 | void __init mem_init(void) |
412 | { |
413 | high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); |
414 | memblock_free_all(); |
415 | } |
416 | |
417 | int pcibus_to_node(struct pci_bus *bus) |
418 | { |
419 | return dev_to_node(dev: &bus->dev); |
420 | } |
421 | EXPORT_SYMBOL(pcibus_to_node); |
422 | |