1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright 2020-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. |
4 | */ |
5 | |
6 | /** |
7 | * DOC: Enclave lifetime management driver for Nitro Enclaves (NE). |
8 | * Nitro is a hypervisor that has been developed by Amazon. |
9 | */ |
10 | |
11 | #include <linux/anon_inodes.h> |
12 | #include <linux/capability.h> |
13 | #include <linux/cpu.h> |
14 | #include <linux/device.h> |
15 | #include <linux/file.h> |
16 | #include <linux/hugetlb.h> |
17 | #include <linux/limits.h> |
18 | #include <linux/list.h> |
19 | #include <linux/miscdevice.h> |
20 | #include <linux/mm.h> |
21 | #include <linux/mman.h> |
22 | #include <linux/module.h> |
23 | #include <linux/mutex.h> |
24 | #include <linux/nitro_enclaves.h> |
25 | #include <linux/pci.h> |
26 | #include <linux/poll.h> |
27 | #include <linux/range.h> |
28 | #include <linux/slab.h> |
29 | #include <linux/types.h> |
30 | #include <uapi/linux/vm_sockets.h> |
31 | |
32 | #include "ne_misc_dev.h" |
33 | #include "ne_pci_dev.h" |
34 | |
35 | /** |
36 | * NE_CPUS_SIZE - Size for max 128 CPUs, for now, in a cpu-list string, comma |
37 | * separated. The NE CPU pool includes CPUs from a single NUMA |
38 | * node. |
39 | */ |
40 | #define NE_CPUS_SIZE (512) |
41 | |
42 | /** |
43 | * NE_EIF_LOAD_OFFSET - The offset where to copy the Enclave Image Format (EIF) |
44 | * image in enclave memory. |
45 | */ |
46 | #define NE_EIF_LOAD_OFFSET (8 * 1024UL * 1024UL) |
47 | |
48 | /** |
49 | * NE_MIN_ENCLAVE_MEM_SIZE - The minimum memory size an enclave can be launched |
50 | * with. |
51 | */ |
52 | #define NE_MIN_ENCLAVE_MEM_SIZE (64 * 1024UL * 1024UL) |
53 | |
54 | /** |
55 | * NE_MIN_MEM_REGION_SIZE - The minimum size of an enclave memory region. |
56 | */ |
57 | #define NE_MIN_MEM_REGION_SIZE (2 * 1024UL * 1024UL) |
58 | |
59 | /** |
60 | * NE_PARENT_VM_CID - The CID for the vsock device of the primary / parent VM. |
61 | */ |
62 | #define NE_PARENT_VM_CID (3) |
63 | |
64 | static long ne_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
65 | |
66 | static const struct file_operations ne_fops = { |
67 | .owner = THIS_MODULE, |
68 | .llseek = noop_llseek, |
69 | .unlocked_ioctl = ne_ioctl, |
70 | }; |
71 | |
72 | static struct miscdevice ne_misc_dev = { |
73 | .minor = MISC_DYNAMIC_MINOR, |
74 | .name = "nitro_enclaves" , |
75 | .fops = &ne_fops, |
76 | .mode = 0660, |
77 | }; |
78 | |
79 | struct ne_devs ne_devs = { |
80 | .ne_misc_dev = &ne_misc_dev, |
81 | }; |
82 | |
83 | /* |
84 | * TODO: Update logic to create new sysfs entries instead of using |
85 | * a kernel parameter e.g. if multiple sysfs files needed. |
86 | */ |
87 | static int ne_set_kernel_param(const char *val, const struct kernel_param *kp); |
88 | |
89 | static const struct kernel_param_ops ne_cpu_pool_ops = { |
90 | .get = param_get_string, |
91 | .set = ne_set_kernel_param, |
92 | }; |
93 | |
94 | static char ne_cpus[NE_CPUS_SIZE]; |
95 | static struct kparam_string ne_cpus_arg = { |
96 | .maxlen = sizeof(ne_cpus), |
97 | .string = ne_cpus, |
98 | }; |
99 | |
100 | module_param_cb(ne_cpus, &ne_cpu_pool_ops, &ne_cpus_arg, 0644); |
101 | /* https://www.kernel.org/doc/html/latest/admin-guide/kernel-parameters.html#cpu-lists */ |
102 | MODULE_PARM_DESC(ne_cpus, "<cpu-list> - CPU pool used for Nitro Enclaves" ); |
103 | |
104 | /** |
105 | * struct ne_cpu_pool - CPU pool used for Nitro Enclaves. |
106 | * @avail_threads_per_core: Available full CPU cores to be dedicated to |
107 | * enclave(s). The cpumasks from the array, indexed |
108 | * by core id, contain all the threads from the |
109 | * available cores, that are not set for created |
110 | * enclave(s). The full CPU cores are part of the |
111 | * NE CPU pool. |
112 | * @mutex: Mutex for the access to the NE CPU pool. |
113 | * @nr_parent_vm_cores : The size of the available threads per core array. |
114 | * The total number of CPU cores available on the |
115 | * primary / parent VM. |
116 | * @nr_threads_per_core: The number of threads that a full CPU core has. |
117 | * @numa_node: NUMA node of the CPUs in the pool. |
118 | */ |
119 | struct ne_cpu_pool { |
120 | cpumask_var_t *avail_threads_per_core; |
121 | struct mutex mutex; |
122 | unsigned int nr_parent_vm_cores; |
123 | unsigned int nr_threads_per_core; |
124 | int numa_node; |
125 | }; |
126 | |
127 | static struct ne_cpu_pool ne_cpu_pool; |
128 | |
129 | /** |
130 | * struct ne_phys_contig_mem_regions - Contiguous physical memory regions. |
131 | * @num: The number of regions that currently has. |
132 | * @regions: The array of physical memory regions. |
133 | */ |
134 | struct ne_phys_contig_mem_regions { |
135 | unsigned long num; |
136 | struct range *regions; |
137 | }; |
138 | |
139 | /** |
140 | * ne_check_enclaves_created() - Verify if at least one enclave has been created. |
141 | * @void: No parameters provided. |
142 | * |
143 | * Context: Process context. |
144 | * Return: |
145 | * * True if at least one enclave is created. |
146 | * * False otherwise. |
147 | */ |
148 | static bool ne_check_enclaves_created(void) |
149 | { |
150 | struct ne_pci_dev *ne_pci_dev = ne_devs.ne_pci_dev; |
151 | bool ret = false; |
152 | |
153 | if (!ne_pci_dev) |
154 | return ret; |
155 | |
156 | mutex_lock(&ne_pci_dev->enclaves_list_mutex); |
157 | |
158 | if (!list_empty(head: &ne_pci_dev->enclaves_list)) |
159 | ret = true; |
160 | |
161 | mutex_unlock(lock: &ne_pci_dev->enclaves_list_mutex); |
162 | |
163 | return ret; |
164 | } |
165 | |
166 | /** |
167 | * ne_setup_cpu_pool() - Set the NE CPU pool after handling sanity checks such |
168 | * as not sharing CPU cores with the primary / parent VM |
169 | * or not using CPU 0, which should remain available for |
170 | * the primary / parent VM. Offline the CPUs from the |
171 | * pool after the checks passed. |
172 | * @ne_cpu_list: The CPU list used for setting NE CPU pool. |
173 | * |
174 | * Context: Process context. |
175 | * Return: |
176 | * * 0 on success. |
177 | * * Negative return value on failure. |
178 | */ |
179 | static int ne_setup_cpu_pool(const char *ne_cpu_list) |
180 | { |
181 | int core_id = -1; |
182 | unsigned int cpu = 0; |
183 | cpumask_var_t cpu_pool; |
184 | unsigned int cpu_sibling = 0; |
185 | unsigned int i = 0; |
186 | int numa_node = -1; |
187 | int rc = -EINVAL; |
188 | |
189 | if (!zalloc_cpumask_var(mask: &cpu_pool, GFP_KERNEL)) |
190 | return -ENOMEM; |
191 | |
192 | mutex_lock(&ne_cpu_pool.mutex); |
193 | |
194 | rc = cpulist_parse(buf: ne_cpu_list, dstp: cpu_pool); |
195 | if (rc < 0) { |
196 | pr_err("%s: Error in cpulist parse [rc=%d]\n" , ne_misc_dev.name, rc); |
197 | |
198 | goto free_pool_cpumask; |
199 | } |
200 | |
201 | cpu = cpumask_any(cpu_pool); |
202 | if (cpu >= nr_cpu_ids) { |
203 | pr_err("%s: No CPUs available in CPU pool\n" , ne_misc_dev.name); |
204 | |
205 | rc = -EINVAL; |
206 | |
207 | goto free_pool_cpumask; |
208 | } |
209 | |
210 | /* |
211 | * Check if the CPUs are online, to further get info about them |
212 | * e.g. numa node, core id, siblings. |
213 | */ |
214 | for_each_cpu(cpu, cpu_pool) |
215 | if (cpu_is_offline(cpu)) { |
216 | pr_err("%s: CPU %d is offline, has to be online to get its metadata\n" , |
217 | ne_misc_dev.name, cpu); |
218 | |
219 | rc = -EINVAL; |
220 | |
221 | goto free_pool_cpumask; |
222 | } |
223 | |
224 | /* |
225 | * Check if the CPUs from the NE CPU pool are from the same NUMA node. |
226 | */ |
227 | for_each_cpu(cpu, cpu_pool) |
228 | if (numa_node < 0) { |
229 | numa_node = cpu_to_node(cpu); |
230 | if (numa_node < 0) { |
231 | pr_err("%s: Invalid NUMA node %d\n" , |
232 | ne_misc_dev.name, numa_node); |
233 | |
234 | rc = -EINVAL; |
235 | |
236 | goto free_pool_cpumask; |
237 | } |
238 | } else { |
239 | if (numa_node != cpu_to_node(cpu)) { |
240 | pr_err("%s: CPUs with different NUMA nodes\n" , |
241 | ne_misc_dev.name); |
242 | |
243 | rc = -EINVAL; |
244 | |
245 | goto free_pool_cpumask; |
246 | } |
247 | } |
248 | |
249 | /* |
250 | * Check if CPU 0 and its siblings are included in the provided CPU pool |
251 | * They should remain available for the primary / parent VM. |
252 | */ |
253 | if (cpumask_test_cpu(cpu: 0, cpumask: cpu_pool)) { |
254 | pr_err("%s: CPU 0 has to remain available\n" , ne_misc_dev.name); |
255 | |
256 | rc = -EINVAL; |
257 | |
258 | goto free_pool_cpumask; |
259 | } |
260 | |
261 | for_each_cpu(cpu_sibling, topology_sibling_cpumask(0)) { |
262 | if (cpumask_test_cpu(cpu: cpu_sibling, cpumask: cpu_pool)) { |
263 | pr_err("%s: CPU sibling %d for CPU 0 is in CPU pool\n" , |
264 | ne_misc_dev.name, cpu_sibling); |
265 | |
266 | rc = -EINVAL; |
267 | |
268 | goto free_pool_cpumask; |
269 | } |
270 | } |
271 | |
272 | /* |
273 | * Check if CPU siblings are included in the provided CPU pool. The |
274 | * expectation is that full CPU cores are made available in the CPU pool |
275 | * for enclaves. |
276 | */ |
277 | for_each_cpu(cpu, cpu_pool) { |
278 | for_each_cpu(cpu_sibling, topology_sibling_cpumask(cpu)) { |
279 | if (!cpumask_test_cpu(cpu: cpu_sibling, cpumask: cpu_pool)) { |
280 | pr_err("%s: CPU %d is not in CPU pool\n" , |
281 | ne_misc_dev.name, cpu_sibling); |
282 | |
283 | rc = -EINVAL; |
284 | |
285 | goto free_pool_cpumask; |
286 | } |
287 | } |
288 | } |
289 | |
290 | /* Calculate the number of threads from a full CPU core. */ |
291 | cpu = cpumask_any(cpu_pool); |
292 | for_each_cpu(cpu_sibling, topology_sibling_cpumask(cpu)) |
293 | ne_cpu_pool.nr_threads_per_core++; |
294 | |
295 | ne_cpu_pool.nr_parent_vm_cores = nr_cpu_ids / ne_cpu_pool.nr_threads_per_core; |
296 | |
297 | ne_cpu_pool.avail_threads_per_core = kcalloc(n: ne_cpu_pool.nr_parent_vm_cores, |
298 | size: sizeof(*ne_cpu_pool.avail_threads_per_core), |
299 | GFP_KERNEL); |
300 | if (!ne_cpu_pool.avail_threads_per_core) { |
301 | rc = -ENOMEM; |
302 | |
303 | goto free_pool_cpumask; |
304 | } |
305 | |
306 | for (i = 0; i < ne_cpu_pool.nr_parent_vm_cores; i++) |
307 | if (!zalloc_cpumask_var(mask: &ne_cpu_pool.avail_threads_per_core[i], GFP_KERNEL)) { |
308 | rc = -ENOMEM; |
309 | |
310 | goto free_cores_cpumask; |
311 | } |
312 | |
313 | /* |
314 | * Split the NE CPU pool in threads per core to keep the CPU topology |
315 | * after offlining the CPUs. |
316 | */ |
317 | for_each_cpu(cpu, cpu_pool) { |
318 | core_id = topology_core_id(cpu); |
319 | if (core_id < 0 || core_id >= ne_cpu_pool.nr_parent_vm_cores) { |
320 | pr_err("%s: Invalid core id %d for CPU %d\n" , |
321 | ne_misc_dev.name, core_id, cpu); |
322 | |
323 | rc = -EINVAL; |
324 | |
325 | goto clear_cpumask; |
326 | } |
327 | |
328 | cpumask_set_cpu(cpu, dstp: ne_cpu_pool.avail_threads_per_core[core_id]); |
329 | } |
330 | |
331 | /* |
332 | * CPUs that are given to enclave(s) should not be considered online |
333 | * by Linux anymore, as the hypervisor will degrade them to floating. |
334 | * The physical CPUs (full cores) are carved out of the primary / parent |
335 | * VM and given to the enclave VM. The same number of vCPUs would run |
336 | * on less pCPUs for the primary / parent VM. |
337 | * |
338 | * We offline them here, to not degrade performance and expose correct |
339 | * topology to Linux and user space. |
340 | */ |
341 | for_each_cpu(cpu, cpu_pool) { |
342 | rc = remove_cpu(cpu); |
343 | if (rc != 0) { |
344 | pr_err("%s: CPU %d is not offlined [rc=%d]\n" , |
345 | ne_misc_dev.name, cpu, rc); |
346 | |
347 | goto online_cpus; |
348 | } |
349 | } |
350 | |
351 | free_cpumask_var(mask: cpu_pool); |
352 | |
353 | ne_cpu_pool.numa_node = numa_node; |
354 | |
355 | mutex_unlock(lock: &ne_cpu_pool.mutex); |
356 | |
357 | return 0; |
358 | |
359 | online_cpus: |
360 | for_each_cpu(cpu, cpu_pool) |
361 | add_cpu(cpu); |
362 | clear_cpumask: |
363 | for (i = 0; i < ne_cpu_pool.nr_parent_vm_cores; i++) |
364 | cpumask_clear(dstp: ne_cpu_pool.avail_threads_per_core[i]); |
365 | free_cores_cpumask: |
366 | for (i = 0; i < ne_cpu_pool.nr_parent_vm_cores; i++) |
367 | free_cpumask_var(mask: ne_cpu_pool.avail_threads_per_core[i]); |
368 | kfree(objp: ne_cpu_pool.avail_threads_per_core); |
369 | free_pool_cpumask: |
370 | free_cpumask_var(mask: cpu_pool); |
371 | ne_cpu_pool.nr_parent_vm_cores = 0; |
372 | ne_cpu_pool.nr_threads_per_core = 0; |
373 | ne_cpu_pool.numa_node = -1; |
374 | mutex_unlock(lock: &ne_cpu_pool.mutex); |
375 | |
376 | return rc; |
377 | } |
378 | |
379 | /** |
380 | * ne_teardown_cpu_pool() - Online the CPUs from the NE CPU pool and cleanup the |
381 | * CPU pool. |
382 | * @void: No parameters provided. |
383 | * |
384 | * Context: Process context. |
385 | */ |
386 | static void ne_teardown_cpu_pool(void) |
387 | { |
388 | unsigned int cpu = 0; |
389 | unsigned int i = 0; |
390 | int rc = -EINVAL; |
391 | |
392 | mutex_lock(&ne_cpu_pool.mutex); |
393 | |
394 | if (!ne_cpu_pool.nr_parent_vm_cores) { |
395 | mutex_unlock(lock: &ne_cpu_pool.mutex); |
396 | |
397 | return; |
398 | } |
399 | |
400 | for (i = 0; i < ne_cpu_pool.nr_parent_vm_cores; i++) { |
401 | for_each_cpu(cpu, ne_cpu_pool.avail_threads_per_core[i]) { |
402 | rc = add_cpu(cpu); |
403 | if (rc != 0) |
404 | pr_err("%s: CPU %d is not onlined [rc=%d]\n" , |
405 | ne_misc_dev.name, cpu, rc); |
406 | } |
407 | |
408 | cpumask_clear(dstp: ne_cpu_pool.avail_threads_per_core[i]); |
409 | |
410 | free_cpumask_var(mask: ne_cpu_pool.avail_threads_per_core[i]); |
411 | } |
412 | |
413 | kfree(objp: ne_cpu_pool.avail_threads_per_core); |
414 | ne_cpu_pool.nr_parent_vm_cores = 0; |
415 | ne_cpu_pool.nr_threads_per_core = 0; |
416 | ne_cpu_pool.numa_node = -1; |
417 | |
418 | mutex_unlock(lock: &ne_cpu_pool.mutex); |
419 | } |
420 | |
421 | /** |
422 | * ne_set_kernel_param() - Set the NE CPU pool value via the NE kernel parameter. |
423 | * @val: NE CPU pool string value. |
424 | * @kp : NE kernel parameter associated with the NE CPU pool. |
425 | * |
426 | * Context: Process context. |
427 | * Return: |
428 | * * 0 on success. |
429 | * * Negative return value on failure. |
430 | */ |
431 | static int ne_set_kernel_param(const char *val, const struct kernel_param *kp) |
432 | { |
433 | char error_val[] = "" ; |
434 | int rc = -EINVAL; |
435 | |
436 | if (!capable(CAP_SYS_ADMIN)) |
437 | return -EPERM; |
438 | |
439 | if (ne_check_enclaves_created()) { |
440 | pr_err("%s: The CPU pool is used by enclave(s)\n" , ne_misc_dev.name); |
441 | |
442 | return -EPERM; |
443 | } |
444 | |
445 | ne_teardown_cpu_pool(); |
446 | |
447 | rc = ne_setup_cpu_pool(ne_cpu_list: val); |
448 | if (rc < 0) { |
449 | pr_err("%s: Error in setup CPU pool [rc=%d]\n" , ne_misc_dev.name, rc); |
450 | |
451 | param_set_copystring(val: error_val, kp); |
452 | |
453 | return rc; |
454 | } |
455 | |
456 | rc = param_set_copystring(val, kp); |
457 | if (rc < 0) { |
458 | pr_err("%s: Error in param set copystring [rc=%d]\n" , ne_misc_dev.name, rc); |
459 | |
460 | ne_teardown_cpu_pool(); |
461 | |
462 | param_set_copystring(val: error_val, kp); |
463 | |
464 | return rc; |
465 | } |
466 | |
467 | return 0; |
468 | } |
469 | |
470 | /** |
471 | * ne_donated_cpu() - Check if the provided CPU is already used by the enclave. |
472 | * @ne_enclave : Private data associated with the current enclave. |
473 | * @cpu: CPU to check if already used. |
474 | * |
475 | * Context: Process context. This function is called with the ne_enclave mutex held. |
476 | * Return: |
477 | * * True if the provided CPU is already used by the enclave. |
478 | * * False otherwise. |
479 | */ |
480 | static bool ne_donated_cpu(struct ne_enclave *ne_enclave, unsigned int cpu) |
481 | { |
482 | if (cpumask_test_cpu(cpu, cpumask: ne_enclave->vcpu_ids)) |
483 | return true; |
484 | |
485 | return false; |
486 | } |
487 | |
488 | /** |
489 | * ne_get_unused_core_from_cpu_pool() - Get the id of a full core from the |
490 | * NE CPU pool. |
491 | * @void: No parameters provided. |
492 | * |
493 | * Context: Process context. This function is called with the ne_enclave and |
494 | * ne_cpu_pool mutexes held. |
495 | * Return: |
496 | * * Core id. |
497 | * * -1 if no CPU core available in the pool. |
498 | */ |
499 | static int ne_get_unused_core_from_cpu_pool(void) |
500 | { |
501 | int core_id = -1; |
502 | unsigned int i = 0; |
503 | |
504 | for (i = 0; i < ne_cpu_pool.nr_parent_vm_cores; i++) |
505 | if (!cpumask_empty(srcp: ne_cpu_pool.avail_threads_per_core[i])) { |
506 | core_id = i; |
507 | |
508 | break; |
509 | } |
510 | |
511 | return core_id; |
512 | } |
513 | |
514 | /** |
515 | * ne_set_enclave_threads_per_core() - Set the threads of the provided core in |
516 | * the enclave data structure. |
517 | * @ne_enclave : Private data associated with the current enclave. |
518 | * @core_id: Core id to get its threads from the NE CPU pool. |
519 | * @vcpu_id: vCPU id part of the provided core. |
520 | * |
521 | * Context: Process context. This function is called with the ne_enclave and |
522 | * ne_cpu_pool mutexes held. |
523 | * Return: |
524 | * * 0 on success. |
525 | * * Negative return value on failure. |
526 | */ |
527 | static int ne_set_enclave_threads_per_core(struct ne_enclave *ne_enclave, |
528 | int core_id, u32 vcpu_id) |
529 | { |
530 | unsigned int cpu = 0; |
531 | |
532 | if (core_id < 0 && vcpu_id == 0) { |
533 | dev_err_ratelimited(ne_misc_dev.this_device, |
534 | "No CPUs available in NE CPU pool\n" ); |
535 | |
536 | return -NE_ERR_NO_CPUS_AVAIL_IN_POOL; |
537 | } |
538 | |
539 | if (core_id < 0) { |
540 | dev_err_ratelimited(ne_misc_dev.this_device, |
541 | "CPU %d is not in NE CPU pool\n" , vcpu_id); |
542 | |
543 | return -NE_ERR_VCPU_NOT_IN_CPU_POOL; |
544 | } |
545 | |
546 | if (core_id >= ne_enclave->nr_parent_vm_cores) { |
547 | dev_err_ratelimited(ne_misc_dev.this_device, |
548 | "Invalid core id %d - ne_enclave\n" , core_id); |
549 | |
550 | return -NE_ERR_VCPU_INVALID_CPU_CORE; |
551 | } |
552 | |
553 | for_each_cpu(cpu, ne_cpu_pool.avail_threads_per_core[core_id]) |
554 | cpumask_set_cpu(cpu, dstp: ne_enclave->threads_per_core[core_id]); |
555 | |
556 | cpumask_clear(dstp: ne_cpu_pool.avail_threads_per_core[core_id]); |
557 | |
558 | return 0; |
559 | } |
560 | |
561 | /** |
562 | * ne_get_cpu_from_cpu_pool() - Get a CPU from the NE CPU pool, either from the |
563 | * remaining sibling(s) of a CPU core or the first |
564 | * sibling of a new CPU core. |
565 | * @ne_enclave : Private data associated with the current enclave. |
566 | * @vcpu_id: vCPU to get from the NE CPU pool. |
567 | * |
568 | * Context: Process context. This function is called with the ne_enclave mutex held. |
569 | * Return: |
570 | * * 0 on success. |
571 | * * Negative return value on failure. |
572 | */ |
573 | static int ne_get_cpu_from_cpu_pool(struct ne_enclave *ne_enclave, u32 *vcpu_id) |
574 | { |
575 | int core_id = -1; |
576 | unsigned int cpu = 0; |
577 | unsigned int i = 0; |
578 | int rc = -EINVAL; |
579 | |
580 | /* |
581 | * If previously allocated a thread of a core to this enclave, first |
582 | * check remaining sibling(s) for new CPU allocations, so that full |
583 | * CPU cores are used for the enclave. |
584 | */ |
585 | for (i = 0; i < ne_enclave->nr_parent_vm_cores; i++) |
586 | for_each_cpu(cpu, ne_enclave->threads_per_core[i]) |
587 | if (!ne_donated_cpu(ne_enclave, cpu)) { |
588 | *vcpu_id = cpu; |
589 | |
590 | return 0; |
591 | } |
592 | |
593 | mutex_lock(&ne_cpu_pool.mutex); |
594 | |
595 | /* |
596 | * If no remaining siblings, get a core from the NE CPU pool and keep |
597 | * track of all the threads in the enclave threads per core data structure. |
598 | */ |
599 | core_id = ne_get_unused_core_from_cpu_pool(); |
600 | |
601 | rc = ne_set_enclave_threads_per_core(ne_enclave, core_id, vcpu_id: *vcpu_id); |
602 | if (rc < 0) |
603 | goto unlock_mutex; |
604 | |
605 | *vcpu_id = cpumask_any(ne_enclave->threads_per_core[core_id]); |
606 | |
607 | rc = 0; |
608 | |
609 | unlock_mutex: |
610 | mutex_unlock(lock: &ne_cpu_pool.mutex); |
611 | |
612 | return rc; |
613 | } |
614 | |
615 | /** |
616 | * ne_get_vcpu_core_from_cpu_pool() - Get from the NE CPU pool the id of the |
617 | * core associated with the provided vCPU. |
618 | * @vcpu_id: Provided vCPU id to get its associated core id. |
619 | * |
620 | * Context: Process context. This function is called with the ne_enclave and |
621 | * ne_cpu_pool mutexes held. |
622 | * Return: |
623 | * * Core id. |
624 | * * -1 if the provided vCPU is not in the pool. |
625 | */ |
626 | static int ne_get_vcpu_core_from_cpu_pool(u32 vcpu_id) |
627 | { |
628 | int core_id = -1; |
629 | unsigned int i = 0; |
630 | |
631 | for (i = 0; i < ne_cpu_pool.nr_parent_vm_cores; i++) |
632 | if (cpumask_test_cpu(cpu: vcpu_id, cpumask: ne_cpu_pool.avail_threads_per_core[i])) { |
633 | core_id = i; |
634 | |
635 | break; |
636 | } |
637 | |
638 | return core_id; |
639 | } |
640 | |
641 | /** |
642 | * ne_check_cpu_in_cpu_pool() - Check if the given vCPU is in the available CPUs |
643 | * from the pool. |
644 | * @ne_enclave : Private data associated with the current enclave. |
645 | * @vcpu_id: ID of the vCPU to check if available in the NE CPU pool. |
646 | * |
647 | * Context: Process context. This function is called with the ne_enclave mutex held. |
648 | * Return: |
649 | * * 0 on success. |
650 | * * Negative return value on failure. |
651 | */ |
652 | static int ne_check_cpu_in_cpu_pool(struct ne_enclave *ne_enclave, u32 vcpu_id) |
653 | { |
654 | int core_id = -1; |
655 | unsigned int i = 0; |
656 | int rc = -EINVAL; |
657 | |
658 | if (ne_donated_cpu(ne_enclave, cpu: vcpu_id)) { |
659 | dev_err_ratelimited(ne_misc_dev.this_device, |
660 | "CPU %d already used\n" , vcpu_id); |
661 | |
662 | return -NE_ERR_VCPU_ALREADY_USED; |
663 | } |
664 | |
665 | /* |
666 | * If previously allocated a thread of a core to this enclave, but not |
667 | * the full core, first check remaining sibling(s). |
668 | */ |
669 | for (i = 0; i < ne_enclave->nr_parent_vm_cores; i++) |
670 | if (cpumask_test_cpu(cpu: vcpu_id, cpumask: ne_enclave->threads_per_core[i])) |
671 | return 0; |
672 | |
673 | mutex_lock(&ne_cpu_pool.mutex); |
674 | |
675 | /* |
676 | * If no remaining siblings, get from the NE CPU pool the core |
677 | * associated with the vCPU and keep track of all the threads in the |
678 | * enclave threads per core data structure. |
679 | */ |
680 | core_id = ne_get_vcpu_core_from_cpu_pool(vcpu_id); |
681 | |
682 | rc = ne_set_enclave_threads_per_core(ne_enclave, core_id, vcpu_id); |
683 | if (rc < 0) |
684 | goto unlock_mutex; |
685 | |
686 | rc = 0; |
687 | |
688 | unlock_mutex: |
689 | mutex_unlock(lock: &ne_cpu_pool.mutex); |
690 | |
691 | return rc; |
692 | } |
693 | |
694 | /** |
695 | * ne_add_vcpu_ioctl() - Add a vCPU to the slot associated with the current |
696 | * enclave. |
697 | * @ne_enclave : Private data associated with the current enclave. |
698 | * @vcpu_id: ID of the CPU to be associated with the given slot, |
699 | * apic id on x86. |
700 | * |
701 | * Context: Process context. This function is called with the ne_enclave mutex held. |
702 | * Return: |
703 | * * 0 on success. |
704 | * * Negative return value on failure. |
705 | */ |
706 | static int ne_add_vcpu_ioctl(struct ne_enclave *ne_enclave, u32 vcpu_id) |
707 | { |
708 | struct ne_pci_dev_cmd_reply cmd_reply = {}; |
709 | struct pci_dev *pdev = ne_devs.ne_pci_dev->pdev; |
710 | int rc = -EINVAL; |
711 | struct slot_add_vcpu_req slot_add_vcpu_req = {}; |
712 | |
713 | if (ne_enclave->mm != current->mm) |
714 | return -EIO; |
715 | |
716 | slot_add_vcpu_req.slot_uid = ne_enclave->slot_uid; |
717 | slot_add_vcpu_req.vcpu_id = vcpu_id; |
718 | |
719 | rc = ne_do_request(pdev, cmd_type: SLOT_ADD_VCPU, |
720 | cmd_request: &slot_add_vcpu_req, cmd_request_size: sizeof(slot_add_vcpu_req), |
721 | cmd_reply: &cmd_reply, cmd_reply_size: sizeof(cmd_reply)); |
722 | if (rc < 0) { |
723 | dev_err_ratelimited(ne_misc_dev.this_device, |
724 | "Error in slot add vCPU [rc=%d]\n" , rc); |
725 | |
726 | return rc; |
727 | } |
728 | |
729 | cpumask_set_cpu(cpu: vcpu_id, dstp: ne_enclave->vcpu_ids); |
730 | |
731 | ne_enclave->nr_vcpus++; |
732 | |
733 | return 0; |
734 | } |
735 | |
736 | /** |
737 | * ne_sanity_check_user_mem_region() - Sanity check the user space memory |
738 | * region received during the set user |
739 | * memory region ioctl call. |
740 | * @ne_enclave : Private data associated with the current enclave. |
741 | * @mem_region : User space memory region to be sanity checked. |
742 | * |
743 | * Context: Process context. This function is called with the ne_enclave mutex held. |
744 | * Return: |
745 | * * 0 on success. |
746 | * * Negative return value on failure. |
747 | */ |
748 | static int ne_sanity_check_user_mem_region(struct ne_enclave *ne_enclave, |
749 | struct ne_user_memory_region mem_region) |
750 | { |
751 | struct ne_mem_region *ne_mem_region = NULL; |
752 | |
753 | if (ne_enclave->mm != current->mm) |
754 | return -EIO; |
755 | |
756 | if (mem_region.memory_size & (NE_MIN_MEM_REGION_SIZE - 1)) { |
757 | dev_err_ratelimited(ne_misc_dev.this_device, |
758 | "User space memory size is not multiple of 2 MiB\n" ); |
759 | |
760 | return -NE_ERR_INVALID_MEM_REGION_SIZE; |
761 | } |
762 | |
763 | if (!IS_ALIGNED(mem_region.userspace_addr, NE_MIN_MEM_REGION_SIZE)) { |
764 | dev_err_ratelimited(ne_misc_dev.this_device, |
765 | "User space address is not 2 MiB aligned\n" ); |
766 | |
767 | return -NE_ERR_UNALIGNED_MEM_REGION_ADDR; |
768 | } |
769 | |
770 | if ((mem_region.userspace_addr & (NE_MIN_MEM_REGION_SIZE - 1)) || |
771 | !access_ok((void __user *)(unsigned long)mem_region.userspace_addr, |
772 | mem_region.memory_size)) { |
773 | dev_err_ratelimited(ne_misc_dev.this_device, |
774 | "Invalid user space address range\n" ); |
775 | |
776 | return -NE_ERR_INVALID_MEM_REGION_ADDR; |
777 | } |
778 | |
779 | list_for_each_entry(ne_mem_region, &ne_enclave->mem_regions_list, |
780 | mem_region_list_entry) { |
781 | u64 memory_size = ne_mem_region->memory_size; |
782 | u64 userspace_addr = ne_mem_region->userspace_addr; |
783 | |
784 | if ((userspace_addr <= mem_region.userspace_addr && |
785 | mem_region.userspace_addr < (userspace_addr + memory_size)) || |
786 | (mem_region.userspace_addr <= userspace_addr && |
787 | (mem_region.userspace_addr + mem_region.memory_size) > userspace_addr)) { |
788 | dev_err_ratelimited(ne_misc_dev.this_device, |
789 | "User space memory region already used\n" ); |
790 | |
791 | return -NE_ERR_MEM_REGION_ALREADY_USED; |
792 | } |
793 | } |
794 | |
795 | return 0; |
796 | } |
797 | |
798 | /** |
799 | * ne_sanity_check_user_mem_region_page() - Sanity check a page from the user space |
800 | * memory region received during the set |
801 | * user memory region ioctl call. |
802 | * @ne_enclave : Private data associated with the current enclave. |
803 | * @mem_region_page: Page from the user space memory region to be sanity checked. |
804 | * |
805 | * Context: Process context. This function is called with the ne_enclave mutex held. |
806 | * Return: |
807 | * * 0 on success. |
808 | * * Negative return value on failure. |
809 | */ |
810 | static int ne_sanity_check_user_mem_region_page(struct ne_enclave *ne_enclave, |
811 | struct page *mem_region_page) |
812 | { |
813 | if (!PageHuge(page: mem_region_page)) { |
814 | dev_err_ratelimited(ne_misc_dev.this_device, |
815 | "Not a hugetlbfs page\n" ); |
816 | |
817 | return -NE_ERR_MEM_NOT_HUGE_PAGE; |
818 | } |
819 | |
820 | if (page_size(page: mem_region_page) & (NE_MIN_MEM_REGION_SIZE - 1)) { |
821 | dev_err_ratelimited(ne_misc_dev.this_device, |
822 | "Page size not multiple of 2 MiB\n" ); |
823 | |
824 | return -NE_ERR_INVALID_PAGE_SIZE; |
825 | } |
826 | |
827 | if (ne_enclave->numa_node != page_to_nid(page: mem_region_page)) { |
828 | dev_err_ratelimited(ne_misc_dev.this_device, |
829 | "Page is not from NUMA node %d\n" , |
830 | ne_enclave->numa_node); |
831 | |
832 | return -NE_ERR_MEM_DIFFERENT_NUMA_NODE; |
833 | } |
834 | |
835 | return 0; |
836 | } |
837 | |
838 | /** |
839 | * ne_sanity_check_phys_mem_region() - Sanity check the start address and the size |
840 | * of a physical memory region. |
841 | * @phys_mem_region_paddr : Physical start address of the region to be sanity checked. |
842 | * @phys_mem_region_size : Length of the region to be sanity checked. |
843 | * |
844 | * Context: Process context. This function is called with the ne_enclave mutex held. |
845 | * Return: |
846 | * * 0 on success. |
847 | * * Negative return value on failure. |
848 | */ |
849 | static int ne_sanity_check_phys_mem_region(u64 phys_mem_region_paddr, |
850 | u64 phys_mem_region_size) |
851 | { |
852 | if (phys_mem_region_size & (NE_MIN_MEM_REGION_SIZE - 1)) { |
853 | dev_err_ratelimited(ne_misc_dev.this_device, |
854 | "Physical mem region size is not multiple of 2 MiB\n" ); |
855 | |
856 | return -EINVAL; |
857 | } |
858 | |
859 | if (!IS_ALIGNED(phys_mem_region_paddr, NE_MIN_MEM_REGION_SIZE)) { |
860 | dev_err_ratelimited(ne_misc_dev.this_device, |
861 | "Physical mem region address is not 2 MiB aligned\n" ); |
862 | |
863 | return -EINVAL; |
864 | } |
865 | |
866 | return 0; |
867 | } |
868 | |
869 | /** |
870 | * ne_merge_phys_contig_memory_regions() - Add a memory region and merge the adjacent |
871 | * regions if they are physically contiguous. |
872 | * @phys_contig_regions : Private data associated with the contiguous physical memory regions. |
873 | * @page_paddr : Physical start address of the region to be added. |
874 | * @page_size : Length of the region to be added. |
875 | * |
876 | * Context: Process context. This function is called with the ne_enclave mutex held. |
877 | * Return: |
878 | * * 0 on success. |
879 | * * Negative return value on failure. |
880 | */ |
881 | static int |
882 | ne_merge_phys_contig_memory_regions(struct ne_phys_contig_mem_regions *phys_contig_regions, |
883 | u64 page_paddr, u64 page_size) |
884 | { |
885 | unsigned long num = phys_contig_regions->num; |
886 | int rc = 0; |
887 | |
888 | rc = ne_sanity_check_phys_mem_region(phys_mem_region_paddr: page_paddr, phys_mem_region_size: page_size); |
889 | if (rc < 0) |
890 | return rc; |
891 | |
892 | /* Physically contiguous, just merge */ |
893 | if (num && (phys_contig_regions->regions[num - 1].end + 1) == page_paddr) { |
894 | phys_contig_regions->regions[num - 1].end += page_size; |
895 | } else { |
896 | phys_contig_regions->regions[num].start = page_paddr; |
897 | phys_contig_regions->regions[num].end = page_paddr + page_size - 1; |
898 | phys_contig_regions->num++; |
899 | } |
900 | |
901 | return 0; |
902 | } |
903 | |
904 | /** |
905 | * ne_set_user_memory_region_ioctl() - Add user space memory region to the slot |
906 | * associated with the current enclave. |
907 | * @ne_enclave : Private data associated with the current enclave. |
908 | * @mem_region : User space memory region to be associated with the given slot. |
909 | * |
910 | * Context: Process context. This function is called with the ne_enclave mutex held. |
911 | * Return: |
912 | * * 0 on success. |
913 | * * Negative return value on failure. |
914 | */ |
915 | static int ne_set_user_memory_region_ioctl(struct ne_enclave *ne_enclave, |
916 | struct ne_user_memory_region mem_region) |
917 | { |
918 | long gup_rc = 0; |
919 | unsigned long i = 0; |
920 | unsigned long max_nr_pages = 0; |
921 | unsigned long memory_size = 0; |
922 | struct ne_mem_region *ne_mem_region = NULL; |
923 | struct pci_dev *pdev = ne_devs.ne_pci_dev->pdev; |
924 | struct ne_phys_contig_mem_regions phys_contig_mem_regions = {}; |
925 | int rc = -EINVAL; |
926 | |
927 | rc = ne_sanity_check_user_mem_region(ne_enclave, mem_region); |
928 | if (rc < 0) |
929 | return rc; |
930 | |
931 | ne_mem_region = kzalloc(size: sizeof(*ne_mem_region), GFP_KERNEL); |
932 | if (!ne_mem_region) |
933 | return -ENOMEM; |
934 | |
935 | max_nr_pages = mem_region.memory_size / NE_MIN_MEM_REGION_SIZE; |
936 | |
937 | ne_mem_region->pages = kcalloc(n: max_nr_pages, size: sizeof(*ne_mem_region->pages), |
938 | GFP_KERNEL); |
939 | if (!ne_mem_region->pages) { |
940 | rc = -ENOMEM; |
941 | |
942 | goto free_mem_region; |
943 | } |
944 | |
945 | phys_contig_mem_regions.regions = kcalloc(n: max_nr_pages, |
946 | size: sizeof(*phys_contig_mem_regions.regions), |
947 | GFP_KERNEL); |
948 | if (!phys_contig_mem_regions.regions) { |
949 | rc = -ENOMEM; |
950 | |
951 | goto free_mem_region; |
952 | } |
953 | |
954 | do { |
955 | i = ne_mem_region->nr_pages; |
956 | |
957 | if (i == max_nr_pages) { |
958 | dev_err_ratelimited(ne_misc_dev.this_device, |
959 | "Reached max nr of pages in the pages data struct\n" ); |
960 | |
961 | rc = -ENOMEM; |
962 | |
963 | goto put_pages; |
964 | } |
965 | |
966 | gup_rc = get_user_pages_unlocked(start: mem_region.userspace_addr + memory_size, nr_pages: 1, |
967 | pages: ne_mem_region->pages + i, gup_flags: FOLL_GET); |
968 | |
969 | if (gup_rc < 0) { |
970 | rc = gup_rc; |
971 | |
972 | dev_err_ratelimited(ne_misc_dev.this_device, |
973 | "Error in get user pages [rc=%d]\n" , rc); |
974 | |
975 | goto put_pages; |
976 | } |
977 | |
978 | rc = ne_sanity_check_user_mem_region_page(ne_enclave, mem_region_page: ne_mem_region->pages[i]); |
979 | if (rc < 0) |
980 | goto put_pages; |
981 | |
982 | rc = ne_merge_phys_contig_memory_regions(phys_contig_regions: &phys_contig_mem_regions, |
983 | page_to_phys(ne_mem_region->pages[i]), |
984 | page_size: page_size(page: ne_mem_region->pages[i])); |
985 | if (rc < 0) |
986 | goto put_pages; |
987 | |
988 | memory_size += page_size(page: ne_mem_region->pages[i]); |
989 | |
990 | ne_mem_region->nr_pages++; |
991 | } while (memory_size < mem_region.memory_size); |
992 | |
993 | if ((ne_enclave->nr_mem_regions + phys_contig_mem_regions.num) > |
994 | ne_enclave->max_mem_regions) { |
995 | dev_err_ratelimited(ne_misc_dev.this_device, |
996 | "Reached max memory regions %lld\n" , |
997 | ne_enclave->max_mem_regions); |
998 | |
999 | rc = -NE_ERR_MEM_MAX_REGIONS; |
1000 | |
1001 | goto put_pages; |
1002 | } |
1003 | |
1004 | for (i = 0; i < phys_contig_mem_regions.num; i++) { |
1005 | u64 phys_region_addr = phys_contig_mem_regions.regions[i].start; |
1006 | u64 phys_region_size = range_len(range: &phys_contig_mem_regions.regions[i]); |
1007 | |
1008 | rc = ne_sanity_check_phys_mem_region(phys_mem_region_paddr: phys_region_addr, phys_mem_region_size: phys_region_size); |
1009 | if (rc < 0) |
1010 | goto put_pages; |
1011 | } |
1012 | |
1013 | ne_mem_region->memory_size = mem_region.memory_size; |
1014 | ne_mem_region->userspace_addr = mem_region.userspace_addr; |
1015 | |
1016 | list_add(new: &ne_mem_region->mem_region_list_entry, head: &ne_enclave->mem_regions_list); |
1017 | |
1018 | for (i = 0; i < phys_contig_mem_regions.num; i++) { |
1019 | struct ne_pci_dev_cmd_reply cmd_reply = {}; |
1020 | struct slot_add_mem_req slot_add_mem_req = {}; |
1021 | |
1022 | slot_add_mem_req.slot_uid = ne_enclave->slot_uid; |
1023 | slot_add_mem_req.paddr = phys_contig_mem_regions.regions[i].start; |
1024 | slot_add_mem_req.size = range_len(range: &phys_contig_mem_regions.regions[i]); |
1025 | |
1026 | rc = ne_do_request(pdev, cmd_type: SLOT_ADD_MEM, |
1027 | cmd_request: &slot_add_mem_req, cmd_request_size: sizeof(slot_add_mem_req), |
1028 | cmd_reply: &cmd_reply, cmd_reply_size: sizeof(cmd_reply)); |
1029 | if (rc < 0) { |
1030 | dev_err_ratelimited(ne_misc_dev.this_device, |
1031 | "Error in slot add mem [rc=%d]\n" , rc); |
1032 | |
1033 | kfree(objp: phys_contig_mem_regions.regions); |
1034 | |
1035 | /* |
1036 | * Exit here without put pages as memory regions may |
1037 | * already been added. |
1038 | */ |
1039 | return rc; |
1040 | } |
1041 | |
1042 | ne_enclave->mem_size += slot_add_mem_req.size; |
1043 | ne_enclave->nr_mem_regions++; |
1044 | } |
1045 | |
1046 | kfree(objp: phys_contig_mem_regions.regions); |
1047 | |
1048 | return 0; |
1049 | |
1050 | put_pages: |
1051 | for (i = 0; i < ne_mem_region->nr_pages; i++) |
1052 | put_page(page: ne_mem_region->pages[i]); |
1053 | free_mem_region: |
1054 | kfree(objp: phys_contig_mem_regions.regions); |
1055 | kfree(objp: ne_mem_region->pages); |
1056 | kfree(objp: ne_mem_region); |
1057 | |
1058 | return rc; |
1059 | } |
1060 | |
1061 | /** |
1062 | * ne_start_enclave_ioctl() - Trigger enclave start after the enclave resources, |
1063 | * such as memory and CPU, have been set. |
1064 | * @ne_enclave : Private data associated with the current enclave. |
1065 | * @enclave_start_info : Enclave info that includes enclave cid and flags. |
1066 | * |
1067 | * Context: Process context. This function is called with the ne_enclave mutex held. |
1068 | * Return: |
1069 | * * 0 on success. |
1070 | * * Negative return value on failure. |
1071 | */ |
1072 | static int ne_start_enclave_ioctl(struct ne_enclave *ne_enclave, |
1073 | struct ne_enclave_start_info *enclave_start_info) |
1074 | { |
1075 | struct ne_pci_dev_cmd_reply cmd_reply = {}; |
1076 | unsigned int cpu = 0; |
1077 | struct enclave_start_req enclave_start_req = {}; |
1078 | unsigned int i = 0; |
1079 | struct pci_dev *pdev = ne_devs.ne_pci_dev->pdev; |
1080 | int rc = -EINVAL; |
1081 | |
1082 | if (!ne_enclave->nr_mem_regions) { |
1083 | dev_err_ratelimited(ne_misc_dev.this_device, |
1084 | "Enclave has no mem regions\n" ); |
1085 | |
1086 | return -NE_ERR_NO_MEM_REGIONS_ADDED; |
1087 | } |
1088 | |
1089 | if (ne_enclave->mem_size < NE_MIN_ENCLAVE_MEM_SIZE) { |
1090 | dev_err_ratelimited(ne_misc_dev.this_device, |
1091 | "Enclave memory is less than %ld\n" , |
1092 | NE_MIN_ENCLAVE_MEM_SIZE); |
1093 | |
1094 | return -NE_ERR_ENCLAVE_MEM_MIN_SIZE; |
1095 | } |
1096 | |
1097 | if (!ne_enclave->nr_vcpus) { |
1098 | dev_err_ratelimited(ne_misc_dev.this_device, |
1099 | "Enclave has no vCPUs\n" ); |
1100 | |
1101 | return -NE_ERR_NO_VCPUS_ADDED; |
1102 | } |
1103 | |
1104 | for (i = 0; i < ne_enclave->nr_parent_vm_cores; i++) |
1105 | for_each_cpu(cpu, ne_enclave->threads_per_core[i]) |
1106 | if (!cpumask_test_cpu(cpu, cpumask: ne_enclave->vcpu_ids)) { |
1107 | dev_err_ratelimited(ne_misc_dev.this_device, |
1108 | "Full CPU cores not used\n" ); |
1109 | |
1110 | return -NE_ERR_FULL_CORES_NOT_USED; |
1111 | } |
1112 | |
1113 | enclave_start_req.enclave_cid = enclave_start_info->enclave_cid; |
1114 | enclave_start_req.flags = enclave_start_info->flags; |
1115 | enclave_start_req.slot_uid = ne_enclave->slot_uid; |
1116 | |
1117 | rc = ne_do_request(pdev, cmd_type: ENCLAVE_START, |
1118 | cmd_request: &enclave_start_req, cmd_request_size: sizeof(enclave_start_req), |
1119 | cmd_reply: &cmd_reply, cmd_reply_size: sizeof(cmd_reply)); |
1120 | if (rc < 0) { |
1121 | dev_err_ratelimited(ne_misc_dev.this_device, |
1122 | "Error in enclave start [rc=%d]\n" , rc); |
1123 | |
1124 | return rc; |
1125 | } |
1126 | |
1127 | ne_enclave->state = NE_STATE_RUNNING; |
1128 | |
1129 | enclave_start_info->enclave_cid = cmd_reply.enclave_cid; |
1130 | |
1131 | return 0; |
1132 | } |
1133 | |
1134 | /** |
1135 | * ne_enclave_ioctl() - Ioctl function provided by the enclave file. |
1136 | * @file: File associated with this ioctl function. |
1137 | * @cmd: The command that is set for the ioctl call. |
1138 | * @arg: The argument that is provided for the ioctl call. |
1139 | * |
1140 | * Context: Process context. |
1141 | * Return: |
1142 | * * 0 on success. |
1143 | * * Negative return value on failure. |
1144 | */ |
1145 | static long ne_enclave_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
1146 | { |
1147 | struct ne_enclave *ne_enclave = file->private_data; |
1148 | |
1149 | switch (cmd) { |
1150 | case NE_ADD_VCPU: { |
1151 | int rc = -EINVAL; |
1152 | u32 vcpu_id = 0; |
1153 | |
1154 | if (copy_from_user(to: &vcpu_id, from: (void __user *)arg, n: sizeof(vcpu_id))) |
1155 | return -EFAULT; |
1156 | |
1157 | mutex_lock(&ne_enclave->enclave_info_mutex); |
1158 | |
1159 | if (ne_enclave->state != NE_STATE_INIT) { |
1160 | dev_err_ratelimited(ne_misc_dev.this_device, |
1161 | "Enclave is not in init state\n" ); |
1162 | |
1163 | mutex_unlock(lock: &ne_enclave->enclave_info_mutex); |
1164 | |
1165 | return -NE_ERR_NOT_IN_INIT_STATE; |
1166 | } |
1167 | |
1168 | if (vcpu_id >= (ne_enclave->nr_parent_vm_cores * |
1169 | ne_enclave->nr_threads_per_core)) { |
1170 | dev_err_ratelimited(ne_misc_dev.this_device, |
1171 | "vCPU id higher than max CPU id\n" ); |
1172 | |
1173 | mutex_unlock(lock: &ne_enclave->enclave_info_mutex); |
1174 | |
1175 | return -NE_ERR_INVALID_VCPU; |
1176 | } |
1177 | |
1178 | if (!vcpu_id) { |
1179 | /* Use the CPU pool for choosing a CPU for the enclave. */ |
1180 | rc = ne_get_cpu_from_cpu_pool(ne_enclave, vcpu_id: &vcpu_id); |
1181 | if (rc < 0) { |
1182 | dev_err_ratelimited(ne_misc_dev.this_device, |
1183 | "Error in get CPU from pool [rc=%d]\n" , |
1184 | rc); |
1185 | |
1186 | mutex_unlock(lock: &ne_enclave->enclave_info_mutex); |
1187 | |
1188 | return rc; |
1189 | } |
1190 | } else { |
1191 | /* Check if the provided vCPU is available in the NE CPU pool. */ |
1192 | rc = ne_check_cpu_in_cpu_pool(ne_enclave, vcpu_id); |
1193 | if (rc < 0) { |
1194 | dev_err_ratelimited(ne_misc_dev.this_device, |
1195 | "Error in check CPU %d in pool [rc=%d]\n" , |
1196 | vcpu_id, rc); |
1197 | |
1198 | mutex_unlock(lock: &ne_enclave->enclave_info_mutex); |
1199 | |
1200 | return rc; |
1201 | } |
1202 | } |
1203 | |
1204 | rc = ne_add_vcpu_ioctl(ne_enclave, vcpu_id); |
1205 | if (rc < 0) { |
1206 | mutex_unlock(lock: &ne_enclave->enclave_info_mutex); |
1207 | |
1208 | return rc; |
1209 | } |
1210 | |
1211 | mutex_unlock(lock: &ne_enclave->enclave_info_mutex); |
1212 | |
1213 | if (copy_to_user(to: (void __user *)arg, from: &vcpu_id, n: sizeof(vcpu_id))) |
1214 | return -EFAULT; |
1215 | |
1216 | return 0; |
1217 | } |
1218 | |
1219 | case NE_GET_IMAGE_LOAD_INFO: { |
1220 | struct ne_image_load_info image_load_info = {}; |
1221 | |
1222 | if (copy_from_user(to: &image_load_info, from: (void __user *)arg, n: sizeof(image_load_info))) |
1223 | return -EFAULT; |
1224 | |
1225 | mutex_lock(&ne_enclave->enclave_info_mutex); |
1226 | |
1227 | if (ne_enclave->state != NE_STATE_INIT) { |
1228 | dev_err_ratelimited(ne_misc_dev.this_device, |
1229 | "Enclave is not in init state\n" ); |
1230 | |
1231 | mutex_unlock(lock: &ne_enclave->enclave_info_mutex); |
1232 | |
1233 | return -NE_ERR_NOT_IN_INIT_STATE; |
1234 | } |
1235 | |
1236 | mutex_unlock(lock: &ne_enclave->enclave_info_mutex); |
1237 | |
1238 | if (!image_load_info.flags || |
1239 | image_load_info.flags >= NE_IMAGE_LOAD_MAX_FLAG_VAL) { |
1240 | dev_err_ratelimited(ne_misc_dev.this_device, |
1241 | "Incorrect flag in enclave image load info\n" ); |
1242 | |
1243 | return -NE_ERR_INVALID_FLAG_VALUE; |
1244 | } |
1245 | |
1246 | if (image_load_info.flags == NE_EIF_IMAGE) |
1247 | image_load_info.memory_offset = NE_EIF_LOAD_OFFSET; |
1248 | |
1249 | if (copy_to_user(to: (void __user *)arg, from: &image_load_info, n: sizeof(image_load_info))) |
1250 | return -EFAULT; |
1251 | |
1252 | return 0; |
1253 | } |
1254 | |
1255 | case NE_SET_USER_MEMORY_REGION: { |
1256 | struct ne_user_memory_region mem_region = {}; |
1257 | int rc = -EINVAL; |
1258 | |
1259 | if (copy_from_user(to: &mem_region, from: (void __user *)arg, n: sizeof(mem_region))) |
1260 | return -EFAULT; |
1261 | |
1262 | if (mem_region.flags >= NE_MEMORY_REGION_MAX_FLAG_VAL) { |
1263 | dev_err_ratelimited(ne_misc_dev.this_device, |
1264 | "Incorrect flag for user memory region\n" ); |
1265 | |
1266 | return -NE_ERR_INVALID_FLAG_VALUE; |
1267 | } |
1268 | |
1269 | mutex_lock(&ne_enclave->enclave_info_mutex); |
1270 | |
1271 | if (ne_enclave->state != NE_STATE_INIT) { |
1272 | dev_err_ratelimited(ne_misc_dev.this_device, |
1273 | "Enclave is not in init state\n" ); |
1274 | |
1275 | mutex_unlock(lock: &ne_enclave->enclave_info_mutex); |
1276 | |
1277 | return -NE_ERR_NOT_IN_INIT_STATE; |
1278 | } |
1279 | |
1280 | rc = ne_set_user_memory_region_ioctl(ne_enclave, mem_region); |
1281 | if (rc < 0) { |
1282 | mutex_unlock(lock: &ne_enclave->enclave_info_mutex); |
1283 | |
1284 | return rc; |
1285 | } |
1286 | |
1287 | mutex_unlock(lock: &ne_enclave->enclave_info_mutex); |
1288 | |
1289 | return 0; |
1290 | } |
1291 | |
1292 | case NE_START_ENCLAVE: { |
1293 | struct ne_enclave_start_info enclave_start_info = {}; |
1294 | int rc = -EINVAL; |
1295 | |
1296 | if (copy_from_user(to: &enclave_start_info, from: (void __user *)arg, |
1297 | n: sizeof(enclave_start_info))) |
1298 | return -EFAULT; |
1299 | |
1300 | if (enclave_start_info.flags >= NE_ENCLAVE_START_MAX_FLAG_VAL) { |
1301 | dev_err_ratelimited(ne_misc_dev.this_device, |
1302 | "Incorrect flag in enclave start info\n" ); |
1303 | |
1304 | return -NE_ERR_INVALID_FLAG_VALUE; |
1305 | } |
1306 | |
1307 | /* |
1308 | * Do not use well-known CIDs - 0, 1, 2 - for enclaves. |
1309 | * VMADDR_CID_ANY = -1U |
1310 | * VMADDR_CID_HYPERVISOR = 0 |
1311 | * VMADDR_CID_LOCAL = 1 |
1312 | * VMADDR_CID_HOST = 2 |
1313 | * Note: 0 is used as a placeholder to auto-generate an enclave CID. |
1314 | * http://man7.org/linux/man-pages/man7/vsock.7.html |
1315 | */ |
1316 | if (enclave_start_info.enclave_cid > 0 && |
1317 | enclave_start_info.enclave_cid <= VMADDR_CID_HOST) { |
1318 | dev_err_ratelimited(ne_misc_dev.this_device, |
1319 | "Well-known CID value, not to be used for enclaves\n" ); |
1320 | |
1321 | return -NE_ERR_INVALID_ENCLAVE_CID; |
1322 | } |
1323 | |
1324 | if (enclave_start_info.enclave_cid == U32_MAX) { |
1325 | dev_err_ratelimited(ne_misc_dev.this_device, |
1326 | "Well-known CID value, not to be used for enclaves\n" ); |
1327 | |
1328 | return -NE_ERR_INVALID_ENCLAVE_CID; |
1329 | } |
1330 | |
1331 | /* |
1332 | * Do not use the CID of the primary / parent VM for enclaves. |
1333 | */ |
1334 | if (enclave_start_info.enclave_cid == NE_PARENT_VM_CID) { |
1335 | dev_err_ratelimited(ne_misc_dev.this_device, |
1336 | "CID of the parent VM, not to be used for enclaves\n" ); |
1337 | |
1338 | return -NE_ERR_INVALID_ENCLAVE_CID; |
1339 | } |
1340 | |
1341 | /* 64-bit CIDs are not yet supported for the vsock device. */ |
1342 | if (enclave_start_info.enclave_cid > U32_MAX) { |
1343 | dev_err_ratelimited(ne_misc_dev.this_device, |
1344 | "64-bit CIDs not yet supported for the vsock device\n" ); |
1345 | |
1346 | return -NE_ERR_INVALID_ENCLAVE_CID; |
1347 | } |
1348 | |
1349 | mutex_lock(&ne_enclave->enclave_info_mutex); |
1350 | |
1351 | if (ne_enclave->state != NE_STATE_INIT) { |
1352 | dev_err_ratelimited(ne_misc_dev.this_device, |
1353 | "Enclave is not in init state\n" ); |
1354 | |
1355 | mutex_unlock(lock: &ne_enclave->enclave_info_mutex); |
1356 | |
1357 | return -NE_ERR_NOT_IN_INIT_STATE; |
1358 | } |
1359 | |
1360 | rc = ne_start_enclave_ioctl(ne_enclave, enclave_start_info: &enclave_start_info); |
1361 | if (rc < 0) { |
1362 | mutex_unlock(lock: &ne_enclave->enclave_info_mutex); |
1363 | |
1364 | return rc; |
1365 | } |
1366 | |
1367 | mutex_unlock(lock: &ne_enclave->enclave_info_mutex); |
1368 | |
1369 | if (copy_to_user(to: (void __user *)arg, from: &enclave_start_info, |
1370 | n: sizeof(enclave_start_info))) |
1371 | return -EFAULT; |
1372 | |
1373 | return 0; |
1374 | } |
1375 | |
1376 | default: |
1377 | return -ENOTTY; |
1378 | } |
1379 | |
1380 | return 0; |
1381 | } |
1382 | |
1383 | /** |
1384 | * ne_enclave_remove_all_mem_region_entries() - Remove all memory region entries |
1385 | * from the enclave data structure. |
1386 | * @ne_enclave : Private data associated with the current enclave. |
1387 | * |
1388 | * Context: Process context. This function is called with the ne_enclave mutex held. |
1389 | */ |
1390 | static void ne_enclave_remove_all_mem_region_entries(struct ne_enclave *ne_enclave) |
1391 | { |
1392 | unsigned long i = 0; |
1393 | struct ne_mem_region *ne_mem_region = NULL; |
1394 | struct ne_mem_region *ne_mem_region_tmp = NULL; |
1395 | |
1396 | list_for_each_entry_safe(ne_mem_region, ne_mem_region_tmp, |
1397 | &ne_enclave->mem_regions_list, |
1398 | mem_region_list_entry) { |
1399 | list_del(entry: &ne_mem_region->mem_region_list_entry); |
1400 | |
1401 | for (i = 0; i < ne_mem_region->nr_pages; i++) |
1402 | put_page(page: ne_mem_region->pages[i]); |
1403 | |
1404 | kfree(objp: ne_mem_region->pages); |
1405 | |
1406 | kfree(objp: ne_mem_region); |
1407 | } |
1408 | } |
1409 | |
1410 | /** |
1411 | * ne_enclave_remove_all_vcpu_id_entries() - Remove all vCPU id entries from |
1412 | * the enclave data structure. |
1413 | * @ne_enclave : Private data associated with the current enclave. |
1414 | * |
1415 | * Context: Process context. This function is called with the ne_enclave mutex held. |
1416 | */ |
1417 | static void ne_enclave_remove_all_vcpu_id_entries(struct ne_enclave *ne_enclave) |
1418 | { |
1419 | unsigned int cpu = 0; |
1420 | unsigned int i = 0; |
1421 | |
1422 | mutex_lock(&ne_cpu_pool.mutex); |
1423 | |
1424 | for (i = 0; i < ne_enclave->nr_parent_vm_cores; i++) { |
1425 | for_each_cpu(cpu, ne_enclave->threads_per_core[i]) |
1426 | /* Update the available NE CPU pool. */ |
1427 | cpumask_set_cpu(cpu, dstp: ne_cpu_pool.avail_threads_per_core[i]); |
1428 | |
1429 | free_cpumask_var(mask: ne_enclave->threads_per_core[i]); |
1430 | } |
1431 | |
1432 | mutex_unlock(lock: &ne_cpu_pool.mutex); |
1433 | |
1434 | kfree(objp: ne_enclave->threads_per_core); |
1435 | |
1436 | free_cpumask_var(mask: ne_enclave->vcpu_ids); |
1437 | } |
1438 | |
1439 | /** |
1440 | * ne_pci_dev_remove_enclave_entry() - Remove the enclave entry from the data |
1441 | * structure that is part of the NE PCI |
1442 | * device private data. |
1443 | * @ne_enclave : Private data associated with the current enclave. |
1444 | * @ne_pci_dev : Private data associated with the PCI device. |
1445 | * |
1446 | * Context: Process context. This function is called with the ne_pci_dev enclave |
1447 | * mutex held. |
1448 | */ |
1449 | static void ne_pci_dev_remove_enclave_entry(struct ne_enclave *ne_enclave, |
1450 | struct ne_pci_dev *ne_pci_dev) |
1451 | { |
1452 | struct ne_enclave *ne_enclave_entry = NULL; |
1453 | struct ne_enclave *ne_enclave_entry_tmp = NULL; |
1454 | |
1455 | list_for_each_entry_safe(ne_enclave_entry, ne_enclave_entry_tmp, |
1456 | &ne_pci_dev->enclaves_list, enclave_list_entry) { |
1457 | if (ne_enclave_entry->slot_uid == ne_enclave->slot_uid) { |
1458 | list_del(entry: &ne_enclave_entry->enclave_list_entry); |
1459 | |
1460 | break; |
1461 | } |
1462 | } |
1463 | } |
1464 | |
1465 | /** |
1466 | * ne_enclave_release() - Release function provided by the enclave file. |
1467 | * @inode: Inode associated with this file release function. |
1468 | * @file: File associated with this release function. |
1469 | * |
1470 | * Context: Process context. |
1471 | * Return: |
1472 | * * 0 on success. |
1473 | * * Negative return value on failure. |
1474 | */ |
1475 | static int ne_enclave_release(struct inode *inode, struct file *file) |
1476 | { |
1477 | struct ne_pci_dev_cmd_reply cmd_reply = {}; |
1478 | struct enclave_stop_req enclave_stop_request = {}; |
1479 | struct ne_enclave *ne_enclave = file->private_data; |
1480 | struct ne_pci_dev *ne_pci_dev = ne_devs.ne_pci_dev; |
1481 | struct pci_dev *pdev = ne_pci_dev->pdev; |
1482 | int rc = -EINVAL; |
1483 | struct slot_free_req slot_free_req = {}; |
1484 | |
1485 | if (!ne_enclave) |
1486 | return 0; |
1487 | |
1488 | /* |
1489 | * Early exit in case there is an error in the enclave creation logic |
1490 | * and fput() is called on the cleanup path. |
1491 | */ |
1492 | if (!ne_enclave->slot_uid) |
1493 | return 0; |
1494 | |
1495 | /* |
1496 | * Acquire the enclave list mutex before the enclave mutex |
1497 | * in order to avoid deadlocks with @ref ne_event_work_handler. |
1498 | */ |
1499 | mutex_lock(&ne_pci_dev->enclaves_list_mutex); |
1500 | mutex_lock(&ne_enclave->enclave_info_mutex); |
1501 | |
1502 | if (ne_enclave->state != NE_STATE_INIT && ne_enclave->state != NE_STATE_STOPPED) { |
1503 | enclave_stop_request.slot_uid = ne_enclave->slot_uid; |
1504 | |
1505 | rc = ne_do_request(pdev, cmd_type: ENCLAVE_STOP, |
1506 | cmd_request: &enclave_stop_request, cmd_request_size: sizeof(enclave_stop_request), |
1507 | cmd_reply: &cmd_reply, cmd_reply_size: sizeof(cmd_reply)); |
1508 | if (rc < 0) { |
1509 | dev_err_ratelimited(ne_misc_dev.this_device, |
1510 | "Error in enclave stop [rc=%d]\n" , rc); |
1511 | |
1512 | goto unlock_mutex; |
1513 | } |
1514 | |
1515 | memset(&cmd_reply, 0, sizeof(cmd_reply)); |
1516 | } |
1517 | |
1518 | slot_free_req.slot_uid = ne_enclave->slot_uid; |
1519 | |
1520 | rc = ne_do_request(pdev, cmd_type: SLOT_FREE, |
1521 | cmd_request: &slot_free_req, cmd_request_size: sizeof(slot_free_req), |
1522 | cmd_reply: &cmd_reply, cmd_reply_size: sizeof(cmd_reply)); |
1523 | if (rc < 0) { |
1524 | dev_err_ratelimited(ne_misc_dev.this_device, |
1525 | "Error in slot free [rc=%d]\n" , rc); |
1526 | |
1527 | goto unlock_mutex; |
1528 | } |
1529 | |
1530 | ne_pci_dev_remove_enclave_entry(ne_enclave, ne_pci_dev); |
1531 | ne_enclave_remove_all_mem_region_entries(ne_enclave); |
1532 | ne_enclave_remove_all_vcpu_id_entries(ne_enclave); |
1533 | |
1534 | mutex_unlock(lock: &ne_enclave->enclave_info_mutex); |
1535 | mutex_unlock(lock: &ne_pci_dev->enclaves_list_mutex); |
1536 | |
1537 | kfree(objp: ne_enclave); |
1538 | |
1539 | return 0; |
1540 | |
1541 | unlock_mutex: |
1542 | mutex_unlock(lock: &ne_enclave->enclave_info_mutex); |
1543 | mutex_unlock(lock: &ne_pci_dev->enclaves_list_mutex); |
1544 | |
1545 | return rc; |
1546 | } |
1547 | |
1548 | /** |
1549 | * ne_enclave_poll() - Poll functionality used for enclave out-of-band events. |
1550 | * @file: File associated with this poll function. |
1551 | * @wait: Poll table data structure. |
1552 | * |
1553 | * Context: Process context. |
1554 | * Return: |
1555 | * * Poll mask. |
1556 | */ |
1557 | static __poll_t ne_enclave_poll(struct file *file, poll_table *wait) |
1558 | { |
1559 | __poll_t mask = 0; |
1560 | struct ne_enclave *ne_enclave = file->private_data; |
1561 | |
1562 | poll_wait(filp: file, wait_address: &ne_enclave->eventq, p: wait); |
1563 | |
1564 | if (ne_enclave->has_event) |
1565 | mask |= EPOLLHUP; |
1566 | |
1567 | return mask; |
1568 | } |
1569 | |
1570 | static const struct file_operations ne_enclave_fops = { |
1571 | .owner = THIS_MODULE, |
1572 | .llseek = noop_llseek, |
1573 | .poll = ne_enclave_poll, |
1574 | .unlocked_ioctl = ne_enclave_ioctl, |
1575 | .release = ne_enclave_release, |
1576 | }; |
1577 | |
1578 | /** |
1579 | * ne_create_vm_ioctl() - Alloc slot to be associated with an enclave. Create |
1580 | * enclave file descriptor to be further used for enclave |
1581 | * resources handling e.g. memory regions and CPUs. |
1582 | * @ne_pci_dev : Private data associated with the PCI device. |
1583 | * @slot_uid: User pointer to store the generated unique slot id |
1584 | * associated with an enclave to. |
1585 | * |
1586 | * Context: Process context. This function is called with the ne_pci_dev enclave |
1587 | * mutex held. |
1588 | * Return: |
1589 | * * Enclave fd on success. |
1590 | * * Negative return value on failure. |
1591 | */ |
1592 | static int ne_create_vm_ioctl(struct ne_pci_dev *ne_pci_dev, u64 __user *slot_uid) |
1593 | { |
1594 | struct ne_pci_dev_cmd_reply cmd_reply = {}; |
1595 | int enclave_fd = -1; |
1596 | struct file *enclave_file = NULL; |
1597 | unsigned int i = 0; |
1598 | struct ne_enclave *ne_enclave = NULL; |
1599 | struct pci_dev *pdev = ne_pci_dev->pdev; |
1600 | int rc = -EINVAL; |
1601 | struct slot_alloc_req slot_alloc_req = {}; |
1602 | |
1603 | mutex_lock(&ne_cpu_pool.mutex); |
1604 | |
1605 | for (i = 0; i < ne_cpu_pool.nr_parent_vm_cores; i++) |
1606 | if (!cpumask_empty(srcp: ne_cpu_pool.avail_threads_per_core[i])) |
1607 | break; |
1608 | |
1609 | if (i == ne_cpu_pool.nr_parent_vm_cores) { |
1610 | dev_err_ratelimited(ne_misc_dev.this_device, |
1611 | "No CPUs available in CPU pool\n" ); |
1612 | |
1613 | mutex_unlock(lock: &ne_cpu_pool.mutex); |
1614 | |
1615 | return -NE_ERR_NO_CPUS_AVAIL_IN_POOL; |
1616 | } |
1617 | |
1618 | mutex_unlock(lock: &ne_cpu_pool.mutex); |
1619 | |
1620 | ne_enclave = kzalloc(size: sizeof(*ne_enclave), GFP_KERNEL); |
1621 | if (!ne_enclave) |
1622 | return -ENOMEM; |
1623 | |
1624 | mutex_lock(&ne_cpu_pool.mutex); |
1625 | |
1626 | ne_enclave->nr_parent_vm_cores = ne_cpu_pool.nr_parent_vm_cores; |
1627 | ne_enclave->nr_threads_per_core = ne_cpu_pool.nr_threads_per_core; |
1628 | ne_enclave->numa_node = ne_cpu_pool.numa_node; |
1629 | |
1630 | mutex_unlock(lock: &ne_cpu_pool.mutex); |
1631 | |
1632 | ne_enclave->threads_per_core = kcalloc(n: ne_enclave->nr_parent_vm_cores, |
1633 | size: sizeof(*ne_enclave->threads_per_core), |
1634 | GFP_KERNEL); |
1635 | if (!ne_enclave->threads_per_core) { |
1636 | rc = -ENOMEM; |
1637 | |
1638 | goto free_ne_enclave; |
1639 | } |
1640 | |
1641 | for (i = 0; i < ne_enclave->nr_parent_vm_cores; i++) |
1642 | if (!zalloc_cpumask_var(mask: &ne_enclave->threads_per_core[i], GFP_KERNEL)) { |
1643 | rc = -ENOMEM; |
1644 | |
1645 | goto free_cpumask; |
1646 | } |
1647 | |
1648 | if (!zalloc_cpumask_var(mask: &ne_enclave->vcpu_ids, GFP_KERNEL)) { |
1649 | rc = -ENOMEM; |
1650 | |
1651 | goto free_cpumask; |
1652 | } |
1653 | |
1654 | enclave_fd = get_unused_fd_flags(O_CLOEXEC); |
1655 | if (enclave_fd < 0) { |
1656 | rc = enclave_fd; |
1657 | |
1658 | dev_err_ratelimited(ne_misc_dev.this_device, |
1659 | "Error in getting unused fd [rc=%d]\n" , rc); |
1660 | |
1661 | goto free_cpumask; |
1662 | } |
1663 | |
1664 | enclave_file = anon_inode_getfile(name: "ne-vm" , fops: &ne_enclave_fops, priv: ne_enclave, O_RDWR); |
1665 | if (IS_ERR(ptr: enclave_file)) { |
1666 | rc = PTR_ERR(ptr: enclave_file); |
1667 | |
1668 | dev_err_ratelimited(ne_misc_dev.this_device, |
1669 | "Error in anon inode get file [rc=%d]\n" , rc); |
1670 | |
1671 | goto put_fd; |
1672 | } |
1673 | |
1674 | rc = ne_do_request(pdev, cmd_type: SLOT_ALLOC, |
1675 | cmd_request: &slot_alloc_req, cmd_request_size: sizeof(slot_alloc_req), |
1676 | cmd_reply: &cmd_reply, cmd_reply_size: sizeof(cmd_reply)); |
1677 | if (rc < 0) { |
1678 | dev_err_ratelimited(ne_misc_dev.this_device, |
1679 | "Error in slot alloc [rc=%d]\n" , rc); |
1680 | |
1681 | goto put_file; |
1682 | } |
1683 | |
1684 | init_waitqueue_head(&ne_enclave->eventq); |
1685 | ne_enclave->has_event = false; |
1686 | mutex_init(&ne_enclave->enclave_info_mutex); |
1687 | ne_enclave->max_mem_regions = cmd_reply.mem_regions; |
1688 | INIT_LIST_HEAD(list: &ne_enclave->mem_regions_list); |
1689 | ne_enclave->mm = current->mm; |
1690 | ne_enclave->slot_uid = cmd_reply.slot_uid; |
1691 | ne_enclave->state = NE_STATE_INIT; |
1692 | |
1693 | list_add(new: &ne_enclave->enclave_list_entry, head: &ne_pci_dev->enclaves_list); |
1694 | |
1695 | if (copy_to_user(to: slot_uid, from: &ne_enclave->slot_uid, n: sizeof(ne_enclave->slot_uid))) { |
1696 | /* |
1697 | * As we're holding the only reference to 'enclave_file', fput() |
1698 | * will call ne_enclave_release() which will do a proper cleanup |
1699 | * of all so far allocated resources, leaving only the unused fd |
1700 | * for us to free. |
1701 | */ |
1702 | fput(enclave_file); |
1703 | put_unused_fd(fd: enclave_fd); |
1704 | |
1705 | return -EFAULT; |
1706 | } |
1707 | |
1708 | fd_install(fd: enclave_fd, file: enclave_file); |
1709 | |
1710 | return enclave_fd; |
1711 | |
1712 | put_file: |
1713 | fput(enclave_file); |
1714 | put_fd: |
1715 | put_unused_fd(fd: enclave_fd); |
1716 | free_cpumask: |
1717 | free_cpumask_var(mask: ne_enclave->vcpu_ids); |
1718 | for (i = 0; i < ne_enclave->nr_parent_vm_cores; i++) |
1719 | free_cpumask_var(mask: ne_enclave->threads_per_core[i]); |
1720 | kfree(objp: ne_enclave->threads_per_core); |
1721 | free_ne_enclave: |
1722 | kfree(objp: ne_enclave); |
1723 | |
1724 | return rc; |
1725 | } |
1726 | |
1727 | /** |
1728 | * ne_ioctl() - Ioctl function provided by the NE misc device. |
1729 | * @file: File associated with this ioctl function. |
1730 | * @cmd: The command that is set for the ioctl call. |
1731 | * @arg: The argument that is provided for the ioctl call. |
1732 | * |
1733 | * Context: Process context. |
1734 | * Return: |
1735 | * * Ioctl result (e.g. enclave file descriptor) on success. |
1736 | * * Negative return value on failure. |
1737 | */ |
1738 | static long ne_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
1739 | { |
1740 | switch (cmd) { |
1741 | case NE_CREATE_VM: { |
1742 | int enclave_fd = -1; |
1743 | struct ne_pci_dev *ne_pci_dev = ne_devs.ne_pci_dev; |
1744 | u64 __user *slot_uid = (void __user *)arg; |
1745 | |
1746 | mutex_lock(&ne_pci_dev->enclaves_list_mutex); |
1747 | enclave_fd = ne_create_vm_ioctl(ne_pci_dev, slot_uid); |
1748 | mutex_unlock(lock: &ne_pci_dev->enclaves_list_mutex); |
1749 | |
1750 | return enclave_fd; |
1751 | } |
1752 | |
1753 | default: |
1754 | return -ENOTTY; |
1755 | } |
1756 | |
1757 | return 0; |
1758 | } |
1759 | |
1760 | #if defined(CONFIG_NITRO_ENCLAVES_MISC_DEV_TEST) |
1761 | #include "ne_misc_dev_test.c" |
1762 | #endif |
1763 | |
1764 | static int __init ne_init(void) |
1765 | { |
1766 | mutex_init(&ne_cpu_pool.mutex); |
1767 | |
1768 | return pci_register_driver(&ne_pci_driver); |
1769 | } |
1770 | |
1771 | static void __exit ne_exit(void) |
1772 | { |
1773 | pci_unregister_driver(dev: &ne_pci_driver); |
1774 | |
1775 | ne_teardown_cpu_pool(); |
1776 | } |
1777 | |
1778 | module_init(ne_init); |
1779 | module_exit(ne_exit); |
1780 | |
1781 | MODULE_AUTHOR("Amazon.com, Inc. or its affiliates" ); |
1782 | MODULE_DESCRIPTION("Nitro Enclaves Driver" ); |
1783 | MODULE_LICENSE("GPL v2" ); |
1784 | |