| 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
| 2 | #ifndef _NET_RPS_H |
| 3 | #define _NET_RPS_H |
| 4 | |
| 5 | #include <linux/types.h> |
| 6 | #include <linux/static_key.h> |
| 7 | #include <net/sock.h> |
| 8 | #include <net/hotdata.h> |
| 9 | |
| 10 | #ifdef CONFIG_RPS |
| 11 | |
| 12 | extern struct static_key_false rps_needed; |
| 13 | extern struct static_key_false rfs_needed; |
| 14 | |
| 15 | /* |
| 16 | * This structure holds an RPS map which can be of variable length. The |
| 17 | * map is an array of CPUs. |
| 18 | */ |
| 19 | struct rps_map { |
| 20 | unsigned int len; |
| 21 | struct rcu_head rcu; |
| 22 | u16 cpus[]; |
| 23 | }; |
| 24 | #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) |
| 25 | |
| 26 | /* |
| 27 | * The rps_dev_flow structure contains the mapping of a flow to a CPU, the |
| 28 | * tail pointer for that CPU's input queue at the time of last enqueue, and |
| 29 | * a hardware filter index. |
| 30 | */ |
| 31 | struct rps_dev_flow { |
| 32 | u16 cpu; |
| 33 | u16 filter; |
| 34 | unsigned int last_qtail; |
| 35 | }; |
| 36 | #define RPS_NO_FILTER 0xffff |
| 37 | |
| 38 | /* |
| 39 | * The rps_dev_flow_table structure contains a table of flow mappings. |
| 40 | */ |
| 41 | struct rps_dev_flow_table { |
| 42 | u8 log; |
| 43 | struct rcu_head rcu; |
| 44 | struct rps_dev_flow flows[]; |
| 45 | }; |
| 46 | #define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ |
| 47 | ((_num) * sizeof(struct rps_dev_flow))) |
| 48 | |
| 49 | /* |
| 50 | * The rps_sock_flow_table contains mappings of flows to the last CPU |
| 51 | * on which they were processed by the application (set in recvmsg). |
| 52 | * Each entry is a 32bit value. Upper part is the high-order bits |
| 53 | * of flow hash, lower part is CPU number. |
| 54 | * rps_cpu_mask is used to partition the space, depending on number of |
| 55 | * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 |
| 56 | * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, |
| 57 | * meaning we use 32-6=26 bits for the hash. |
| 58 | */ |
| 59 | struct rps_sock_flow_table { |
| 60 | struct rcu_head rcu; |
| 61 | u32 mask; |
| 62 | |
| 63 | u32 ents[] ____cacheline_aligned_in_smp; |
| 64 | }; |
| 65 | #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) |
| 66 | |
| 67 | #define RPS_NO_CPU 0xffff |
| 68 | |
| 69 | static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, |
| 70 | u32 hash) |
| 71 | { |
| 72 | unsigned int index = hash & table->mask; |
| 73 | u32 val = hash & ~net_hotdata.rps_cpu_mask; |
| 74 | |
| 75 | /* We only give a hint, preemption can change CPU under us */ |
| 76 | val |= raw_smp_processor_id(); |
| 77 | |
| 78 | /* The following WRITE_ONCE() is paired with the READ_ONCE() |
| 79 | * here, and another one in get_rps_cpu(). |
| 80 | */ |
| 81 | if (READ_ONCE(table->ents[index]) != val) |
| 82 | WRITE_ONCE(table->ents[index], val); |
| 83 | } |
| 84 | |
| 85 | #endif /* CONFIG_RPS */ |
| 86 | |
| 87 | static inline void sock_rps_record_flow_hash(__u32 hash) |
| 88 | { |
| 89 | #ifdef CONFIG_RPS |
| 90 | struct rps_sock_flow_table *sock_flow_table; |
| 91 | |
| 92 | if (!hash) |
| 93 | return; |
| 94 | rcu_read_lock(); |
| 95 | sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table); |
| 96 | if (sock_flow_table) |
| 97 | rps_record_sock_flow(table: sock_flow_table, hash); |
| 98 | rcu_read_unlock(); |
| 99 | #endif |
| 100 | } |
| 101 | |
| 102 | static inline void sock_rps_record_flow(const struct sock *sk) |
| 103 | { |
| 104 | #ifdef CONFIG_RPS |
| 105 | if (static_branch_unlikely(&rfs_needed)) { |
| 106 | /* Reading sk->sk_rxhash might incur an expensive cache line |
| 107 | * miss. |
| 108 | * |
| 109 | * TCP_ESTABLISHED does cover almost all states where RFS |
| 110 | * might be useful, and is cheaper [1] than testing : |
| 111 | * IPv4: inet_sk(sk)->inet_daddr |
| 112 | * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) |
| 113 | * OR an additional socket flag |
| 114 | * [1] : sk_state and sk_prot are in the same cache line. |
| 115 | */ |
| 116 | if (sk->sk_state == TCP_ESTABLISHED) { |
| 117 | /* This READ_ONCE() is paired with the WRITE_ONCE() |
| 118 | * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). |
| 119 | */ |
| 120 | sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); |
| 121 | } |
| 122 | } |
| 123 | #endif |
| 124 | } |
| 125 | |
| 126 | static inline void sock_rps_delete_flow(const struct sock *sk) |
| 127 | { |
| 128 | #ifdef CONFIG_RPS |
| 129 | struct rps_sock_flow_table *table; |
| 130 | u32 hash, index; |
| 131 | |
| 132 | if (!static_branch_unlikely(&rfs_needed)) |
| 133 | return; |
| 134 | |
| 135 | hash = READ_ONCE(sk->sk_rxhash); |
| 136 | if (!hash) |
| 137 | return; |
| 138 | |
| 139 | rcu_read_lock(); |
| 140 | table = rcu_dereference(net_hotdata.rps_sock_flow_table); |
| 141 | if (table) { |
| 142 | index = hash & table->mask; |
| 143 | if (READ_ONCE(table->ents[index]) != RPS_NO_CPU) |
| 144 | WRITE_ONCE(table->ents[index], RPS_NO_CPU); |
| 145 | } |
| 146 | rcu_read_unlock(); |
| 147 | #endif |
| 148 | } |
| 149 | |
| 150 | static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd) |
| 151 | { |
| 152 | #ifdef CONFIG_RPS |
| 153 | return ++sd->input_queue_tail; |
| 154 | #else |
| 155 | return 0; |
| 156 | #endif |
| 157 | } |
| 158 | |
| 159 | static inline void rps_input_queue_tail_save(u32 *dest, u32 tail) |
| 160 | { |
| 161 | #ifdef CONFIG_RPS |
| 162 | WRITE_ONCE(*dest, tail); |
| 163 | #endif |
| 164 | } |
| 165 | |
| 166 | static inline void rps_input_queue_head_add(struct softnet_data *sd, int val) |
| 167 | { |
| 168 | #ifdef CONFIG_RPS |
| 169 | WRITE_ONCE(sd->input_queue_head, sd->input_queue_head + val); |
| 170 | #endif |
| 171 | } |
| 172 | |
| 173 | static inline void rps_input_queue_head_incr(struct softnet_data *sd) |
| 174 | { |
| 175 | rps_input_queue_head_add(sd, val: 1); |
| 176 | } |
| 177 | |
| 178 | #endif /* _NET_RPS_H */ |
| 179 | |