1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | #ifndef _NET_RPS_H |
3 | #define _NET_RPS_H |
4 | |
5 | #include <linux/types.h> |
6 | #include <linux/static_key.h> |
7 | #include <net/sock.h> |
8 | #include <net/hotdata.h> |
9 | |
10 | #ifdef CONFIG_RPS |
11 | |
12 | extern struct static_key_false rps_needed; |
13 | extern struct static_key_false rfs_needed; |
14 | |
15 | /* |
16 | * This structure holds an RPS map which can be of variable length. The |
17 | * map is an array of CPUs. |
18 | */ |
19 | struct rps_map { |
20 | unsigned int len; |
21 | struct rcu_head rcu; |
22 | u16 cpus[]; |
23 | }; |
24 | #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) |
25 | |
26 | /* |
27 | * The rps_dev_flow structure contains the mapping of a flow to a CPU, the |
28 | * tail pointer for that CPU's input queue at the time of last enqueue, and |
29 | * a hardware filter index. |
30 | */ |
31 | struct rps_dev_flow { |
32 | u16 cpu; |
33 | u16 filter; |
34 | unsigned int last_qtail; |
35 | }; |
36 | #define RPS_NO_FILTER 0xffff |
37 | |
38 | /* |
39 | * The rps_dev_flow_table structure contains a table of flow mappings. |
40 | */ |
41 | struct rps_dev_flow_table { |
42 | unsigned int mask; |
43 | struct rcu_head rcu; |
44 | struct rps_dev_flow flows[]; |
45 | }; |
46 | #define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ |
47 | ((_num) * sizeof(struct rps_dev_flow))) |
48 | |
49 | /* |
50 | * The rps_sock_flow_table contains mappings of flows to the last CPU |
51 | * on which they were processed by the application (set in recvmsg). |
52 | * Each entry is a 32bit value. Upper part is the high-order bits |
53 | * of flow hash, lower part is CPU number. |
54 | * rps_cpu_mask is used to partition the space, depending on number of |
55 | * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 |
56 | * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, |
57 | * meaning we use 32-6=26 bits for the hash. |
58 | */ |
59 | struct rps_sock_flow_table { |
60 | u32 mask; |
61 | |
62 | u32 ents[] ____cacheline_aligned_in_smp; |
63 | }; |
64 | #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) |
65 | |
66 | #define RPS_NO_CPU 0xffff |
67 | |
68 | static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, |
69 | u32 hash) |
70 | { |
71 | unsigned int index = hash & table->mask; |
72 | u32 val = hash & ~net_hotdata.rps_cpu_mask; |
73 | |
74 | /* We only give a hint, preemption can change CPU under us */ |
75 | val |= raw_smp_processor_id(); |
76 | |
77 | /* The following WRITE_ONCE() is paired with the READ_ONCE() |
78 | * here, and another one in get_rps_cpu(). |
79 | */ |
80 | if (READ_ONCE(table->ents[index]) != val) |
81 | WRITE_ONCE(table->ents[index], val); |
82 | } |
83 | |
84 | #endif /* CONFIG_RPS */ |
85 | |
86 | static inline void sock_rps_record_flow_hash(__u32 hash) |
87 | { |
88 | #ifdef CONFIG_RPS |
89 | struct rps_sock_flow_table *sock_flow_table; |
90 | |
91 | if (!hash) |
92 | return; |
93 | rcu_read_lock(); |
94 | sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table); |
95 | if (sock_flow_table) |
96 | rps_record_sock_flow(table: sock_flow_table, hash); |
97 | rcu_read_unlock(); |
98 | #endif |
99 | } |
100 | |
101 | static inline void sock_rps_record_flow(const struct sock *sk) |
102 | { |
103 | #ifdef CONFIG_RPS |
104 | if (static_branch_unlikely(&rfs_needed)) { |
105 | /* Reading sk->sk_rxhash might incur an expensive cache line |
106 | * miss. |
107 | * |
108 | * TCP_ESTABLISHED does cover almost all states where RFS |
109 | * might be useful, and is cheaper [1] than testing : |
110 | * IPv4: inet_sk(sk)->inet_daddr |
111 | * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) |
112 | * OR an additional socket flag |
113 | * [1] : sk_state and sk_prot are in the same cache line. |
114 | */ |
115 | if (sk->sk_state == TCP_ESTABLISHED) { |
116 | /* This READ_ONCE() is paired with the WRITE_ONCE() |
117 | * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). |
118 | */ |
119 | sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); |
120 | } |
121 | } |
122 | #endif |
123 | } |
124 | |
125 | #endif /* _NET_RPS_H */ |
126 | |