1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * ip_vs_nfct.c: Netfilter connection tracking support for IPVS |
4 | * |
5 | * Portions Copyright (C) 2001-2002 |
6 | * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. |
7 | * |
8 | * Portions Copyright (C) 2003-2010 |
9 | * Julian Anastasov |
10 | * |
11 | * Authors: |
12 | * Ben North <ben@redfrontdoor.org> |
13 | * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels |
14 | * Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match |
15 | * |
16 | * Current status: |
17 | * |
18 | * - provide conntrack confirmation for new and related connections, by |
19 | * this way we can see their proper conntrack state in all hooks |
20 | * - support for all forwarding methods, not only NAT |
21 | * - FTP support (NAT), ability to support other NAT apps with expectations |
22 | * - to correctly create expectations for related NAT connections the proper |
23 | * NF conntrack support must be already installed, eg. ip_vs_ftp requires |
24 | * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables |
25 | * NAT rules are needed) |
26 | * - alter reply for NAT when forwarding packet in original direction: |
27 | * conntrack from client in NEW or RELATED (Passive FTP DATA) state or |
28 | * when RELATED conntrack is created from real server (Active FTP DATA) |
29 | * - if iptables_nat is not loaded the Passive FTP will not work (the |
30 | * PASV response can not be NAT-ed) but Active FTP should work |
31 | */ |
32 | |
33 | #define KMSG_COMPONENT "IPVS" |
34 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
35 | |
36 | #include <linux/module.h> |
37 | #include <linux/types.h> |
38 | #include <linux/kernel.h> |
39 | #include <linux/errno.h> |
40 | #include <linux/compiler.h> |
41 | #include <linux/vmalloc.h> |
42 | #include <linux/skbuff.h> |
43 | #include <net/ip.h> |
44 | #include <linux/netfilter.h> |
45 | #include <linux/netfilter_ipv4.h> |
46 | #include <net/ip_vs.h> |
47 | #include <net/netfilter/nf_conntrack_core.h> |
48 | #include <net/netfilter/nf_conntrack_expect.h> |
49 | #include <net/netfilter/nf_conntrack_seqadj.h> |
50 | #include <net/netfilter/nf_conntrack_helper.h> |
51 | #include <net/netfilter/nf_conntrack_zones.h> |
52 | |
53 | |
54 | #define FMT_TUPLE "%s:%u->%s:%u/%u" |
55 | #define ARG_TUPLE(T) IP_VS_DBG_ADDR((T)->src.l3num, &(T)->src.u3), \ |
56 | ntohs((T)->src.u.all), \ |
57 | IP_VS_DBG_ADDR((T)->src.l3num, &(T)->dst.u3), \ |
58 | ntohs((T)->dst.u.all), \ |
59 | (T)->dst.protonum |
60 | |
61 | #define FMT_CONN "%s:%u->%s:%u->%s:%u/%u:%u" |
62 | #define ARG_CONN(C) IP_VS_DBG_ADDR((C)->af, &((C)->caddr)), \ |
63 | ntohs((C)->cport), \ |
64 | IP_VS_DBG_ADDR((C)->af, &((C)->vaddr)), \ |
65 | ntohs((C)->vport), \ |
66 | IP_VS_DBG_ADDR((C)->daf, &((C)->daddr)), \ |
67 | ntohs((C)->dport), \ |
68 | (C)->protocol, (C)->state |
69 | |
70 | void |
71 | ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) |
72 | { |
73 | enum ip_conntrack_info ctinfo; |
74 | struct nf_conn *ct = nf_ct_get(skb, ctinfo: &ctinfo); |
75 | struct nf_conntrack_tuple new_tuple; |
76 | |
77 | if (ct == NULL || nf_ct_is_confirmed(ct) || |
78 | nf_ct_is_dying(ct)) |
79 | return; |
80 | |
81 | /* Never alter conntrack for non-NAT conns */ |
82 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) |
83 | return; |
84 | |
85 | /* Never alter conntrack for OPS conns (no reply is expected) */ |
86 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) |
87 | return; |
88 | |
89 | /* Alter reply only in original direction */ |
90 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) |
91 | return; |
92 | |
93 | /* Applications may adjust TCP seqs */ |
94 | if (cp->app && nf_ct_protonum(ct) == IPPROTO_TCP && |
95 | !nfct_seqadj(ct) && !nfct_seqadj_ext_add(ct)) |
96 | return; |
97 | |
98 | /* |
99 | * The connection is not yet in the hashtable, so we update it. |
100 | * CIP->VIP will remain the same, so leave the tuple in |
101 | * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the |
102 | * real-server we will see RIP->DIP. |
103 | */ |
104 | new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; |
105 | /* |
106 | * This will also take care of UDP and other protocols. |
107 | */ |
108 | if (outin) { |
109 | new_tuple.src.u3 = cp->daddr; |
110 | if (new_tuple.dst.protonum != IPPROTO_ICMP && |
111 | new_tuple.dst.protonum != IPPROTO_ICMPV6) |
112 | new_tuple.src.u.tcp.port = cp->dport; |
113 | } else { |
114 | new_tuple.dst.u3 = cp->vaddr; |
115 | if (new_tuple.dst.protonum != IPPROTO_ICMP && |
116 | new_tuple.dst.protonum != IPPROTO_ICMPV6) |
117 | new_tuple.dst.u.tcp.port = cp->vport; |
118 | } |
119 | IP_VS_DBG_BUF(7, "%s: Updating conntrack ct=%p, status=0x%lX, " |
120 | "ctinfo=%d, old reply=" FMT_TUPLE "\n" , |
121 | __func__, ct, ct->status, ctinfo, |
122 | ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple)); |
123 | IP_VS_DBG_BUF(7, "%s: Updating conntrack ct=%p, status=0x%lX, " |
124 | "ctinfo=%d, new reply=" FMT_TUPLE "\n" , |
125 | __func__, ct, ct->status, ctinfo, |
126 | ARG_TUPLE(&new_tuple)); |
127 | nf_conntrack_alter_reply(ct, newreply: &new_tuple); |
128 | IP_VS_DBG_BUF(7, "%s: Updated conntrack ct=%p for cp=" FMT_CONN "\n" , |
129 | __func__, ct, ARG_CONN(cp)); |
130 | } |
131 | |
132 | int ip_vs_confirm_conntrack(struct sk_buff *skb) |
133 | { |
134 | return nf_conntrack_confirm(skb); |
135 | } |
136 | |
137 | /* |
138 | * Called from init_conntrack() as expectfn handler. |
139 | */ |
140 | static void ip_vs_nfct_expect_callback(struct nf_conn *ct, |
141 | struct nf_conntrack_expect *exp) |
142 | { |
143 | struct nf_conntrack_tuple *orig, new_reply; |
144 | struct ip_vs_conn *cp; |
145 | struct ip_vs_conn_param p; |
146 | struct net *net = nf_ct_net(ct); |
147 | |
148 | /* |
149 | * We assume that no NF locks are held before this callback. |
150 | * ip_vs_conn_out_get and ip_vs_conn_in_get should match their |
151 | * expectations even if they use wildcard values, now we provide the |
152 | * actual values from the newly created original conntrack direction. |
153 | * The conntrack is confirmed when packet reaches IPVS hooks. |
154 | */ |
155 | |
156 | /* RS->CLIENT */ |
157 | orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; |
158 | ip_vs_conn_fill_param(ipvs: net_ipvs(net), af: exp->tuple.src.l3num, protocol: orig->dst.protonum, |
159 | caddr: &orig->src.u3, cport: orig->src.u.tcp.port, |
160 | vaddr: &orig->dst.u3, vport: orig->dst.u.tcp.port, p: &p); |
161 | cp = ip_vs_conn_out_get(p: &p); |
162 | if (cp) { |
163 | /* Change reply CLIENT->RS to CLIENT->VS */ |
164 | IP_VS_DBG_BUF(7, "%s: for ct=%p, status=0x%lX found inout cp=" |
165 | FMT_CONN "\n" , |
166 | __func__, ct, ct->status, ARG_CONN(cp)); |
167 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; |
168 | IP_VS_DBG_BUF(7, "%s: ct=%p before alter: reply tuple=" |
169 | FMT_TUPLE "\n" , |
170 | __func__, ct, ARG_TUPLE(&new_reply)); |
171 | new_reply.dst.u3 = cp->vaddr; |
172 | new_reply.dst.u.tcp.port = cp->vport; |
173 | goto alter; |
174 | } |
175 | |
176 | /* CLIENT->VS */ |
177 | cp = ip_vs_conn_in_get(p: &p); |
178 | if (cp) { |
179 | /* Change reply VS->CLIENT to RS->CLIENT */ |
180 | IP_VS_DBG_BUF(7, "%s: for ct=%p, status=0x%lX found outin cp=" |
181 | FMT_CONN "\n" , |
182 | __func__, ct, ct->status, ARG_CONN(cp)); |
183 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; |
184 | IP_VS_DBG_BUF(7, "%s: ct=%p before alter: reply tuple=" |
185 | FMT_TUPLE "\n" , |
186 | __func__, ct, ARG_TUPLE(&new_reply)); |
187 | new_reply.src.u3 = cp->daddr; |
188 | new_reply.src.u.tcp.port = cp->dport; |
189 | goto alter; |
190 | } |
191 | |
192 | IP_VS_DBG_BUF(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE |
193 | " - unknown expect\n" , |
194 | __func__, ct, ct->status, ARG_TUPLE(orig)); |
195 | return; |
196 | |
197 | alter: |
198 | /* Never alter conntrack for non-NAT conns */ |
199 | if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) |
200 | nf_conntrack_alter_reply(ct, newreply: &new_reply); |
201 | ip_vs_conn_put(cp); |
202 | return; |
203 | } |
204 | |
205 | /* |
206 | * Create NF conntrack expectation with wildcard (optional) source port. |
207 | * Then the default callback function will alter the reply and will confirm |
208 | * the conntrack entry when the first packet comes. |
209 | * Use port 0 to expect connection from any port. |
210 | */ |
211 | void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, |
212 | struct ip_vs_conn *cp, u_int8_t proto, |
213 | const __be16 port, int from_rs) |
214 | { |
215 | struct nf_conntrack_expect *exp; |
216 | |
217 | if (ct == NULL) |
218 | return; |
219 | |
220 | exp = nf_ct_expect_alloc(me: ct); |
221 | if (!exp) |
222 | return; |
223 | |
224 | nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), |
225 | from_rs ? &cp->daddr : &cp->caddr, |
226 | from_rs ? &cp->caddr : &cp->vaddr, |
227 | proto, port ? &port : NULL, |
228 | from_rs ? &cp->cport : &cp->vport); |
229 | |
230 | exp->expectfn = ip_vs_nfct_expect_callback; |
231 | |
232 | IP_VS_DBG_BUF(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n" , |
233 | __func__, ct, ARG_TUPLE(&exp->tuple)); |
234 | nf_ct_expect_related(expect: exp, flags: 0); |
235 | nf_ct_expect_put(exp); |
236 | } |
237 | EXPORT_SYMBOL(ip_vs_nfct_expect_related); |
238 | |
239 | /* |
240 | * Our connection was terminated, try to drop the conntrack immediately |
241 | */ |
242 | void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) |
243 | { |
244 | struct nf_conntrack_tuple_hash *h; |
245 | struct nf_conn *ct; |
246 | struct nf_conntrack_tuple tuple; |
247 | |
248 | if (!cp->cport) |
249 | return; |
250 | |
251 | tuple = (struct nf_conntrack_tuple) { |
252 | .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } }; |
253 | tuple.src.u3 = cp->caddr; |
254 | tuple.src.u.all = cp->cport; |
255 | tuple.src.l3num = cp->af; |
256 | tuple.dst.u3 = cp->vaddr; |
257 | tuple.dst.u.all = cp->vport; |
258 | |
259 | IP_VS_DBG_BUF(7, "%s: dropping conntrack for conn " FMT_CONN "\n" , |
260 | __func__, ARG_CONN(cp)); |
261 | |
262 | h = nf_conntrack_find_get(net: cp->ipvs->net, zone: &nf_ct_zone_dflt, tuple: &tuple); |
263 | if (h) { |
264 | ct = nf_ct_tuplehash_to_ctrack(hash: h); |
265 | if (nf_ct_kill(ct)) { |
266 | IP_VS_DBG_BUF(7, "%s: ct=%p deleted for tuple=" |
267 | FMT_TUPLE "\n" , |
268 | __func__, ct, ARG_TUPLE(&tuple)); |
269 | } else { |
270 | IP_VS_DBG_BUF(7, "%s: ct=%p, no conntrack for tuple=" |
271 | FMT_TUPLE "\n" , |
272 | __func__, ct, ARG_TUPLE(&tuple)); |
273 | } |
274 | nf_ct_put(ct); |
275 | } else { |
276 | IP_VS_DBG_BUF(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n" , |
277 | __func__, ARG_TUPLE(&tuple)); |
278 | } |
279 | } |
280 | |
281 | |