2022-10-05 09:44:45 |
Chengen Du |
description |
[Impact]
Environment: v4.15.0-177-generic Xenial ESM
Using NFQUEUE to delegate the decision on TCP packets to userspace processes will cause memory leak.
The symptom is that TCP slab objects will accumulate and eventually cause OOM.
[Fix]
There is a discrepancy between backport and upstream commit.
[Upstream Commit c3873070247d9e3c7a6b0cf9bf9b45e8018427b1]
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 9eed51e920e8..980daa6e1e3a 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -37,7 +37,7 @@ void nf_register_queue_handler(const struct nf_queue_handler *qh);
void nf_unregister_queue_handler(void);
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
-void nf_queue_entry_get_refs(struct nf_queue_entry *entry);
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
void nf_queue_entry_free(struct nf_queue_entry *entry);
static inline void init_hashrandom(u32 *jhash_initval)
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 5ab0680db445..e39549c55945 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -96,19 +96,21 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry)
}
/* Bump dev refs so they don't vanish while packet is out */
-void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
+ if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt))
+ return false;
+
dev_hold(state->in);
dev_hold(state->out);
- if (state->sk)
- sock_hold(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
dev_hold(entry->physin);
dev_hold(entry->physout);
#endif
+ return true;
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
@@ -196,7 +198,10 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
__nf_queue_entry_init_physdevs(entry);
- nf_queue_entry_get_refs(entry);
+ if (!nf_queue_entry_get_refs(entry)) {
+ kfree(entry);
+ return -ENOTCONN;
+ }
switch (entry->state.pf) {
case AF_INET:
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index ea2d9c2a44cf..64a6acb6aeae 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -710,9 +710,15 @@ static struct nf_queue_entry *
nf_queue_entry_dup(struct nf_queue_entry *e)
{
struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
- if (entry)
- nf_queue_entry_get_refs(entry);
- return entry;
+
+ if (!entry)
+ return NULL;
+
+ if (nf_queue_entry_get_refs(entry))
+ return entry;
+
+ kfree(entry);
+ return NULL;
}
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
[Backport Commit 4d032d60432327f068f03b516f5b6c51ceb17d15]
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 814058d0f167..f38cc6092c5a 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -32,7 +32,7 @@ void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *q
void nf_unregister_queue_handler(struct net *net);
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
-void nf_queue_entry_get_refs(struct nf_queue_entry *entry);
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
static inline void init_hashrandom(u32 *jhash_initval)
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 59340b3ef7ef..dbc45165c533 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -80,10 +80,13 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
/* Bump dev refs so they don't vanish while packet is out */
-void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
+ if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt))
+ return false;
+
if (state->in)
dev_hold(state->in);
if (state->out)
@@ -102,6 +105,7 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
dev_hold(physdev);
}
#endif
+ return true;
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
@@ -159,7 +163,11 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
.size = sizeof(*entry) + afinfo->route_key_size,
};
- nf_queue_entry_get_refs(entry);
+ if (!nf_queue_entry_get_refs(entry)) {
+ kfree(entry);
+ return -ENOTCONN;
+ }
+
afinfo->saveroute(skb, entry);
status = qh->outfn(entry, queuenum);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 48ed30e1b405..abad8bf6fa38 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -693,9 +693,15 @@ static struct nf_queue_entry *
nf_queue_entry_dup(struct nf_queue_entry *e)
{
struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
- if (entry)
- nf_queue_entry_get_refs(entry);
- return entry;
+
+ if (!entry)
+ return NULL;
+
+ if (nf_queue_entry_get_refs(entry))
+ return entry;
+
+ kfree(entry);
+ return NULL;
}
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
[Difference between Commits]
50,57c57,62
< dev_hold(state->in);
< dev_hold(state->out);
< - if (state->sk)
< - sock_hold(state->sk);
<
< #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
< dev_hold(entry->physin);
< dev_hold(entry->physout);
---
> if (state->in)
> dev_hold(state->in);
> if (state->out)
> @@ -102,6 +105,7 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
> dev_hold(physdev);
> }
The sock_hold() logic still remains in the backport commit, which will affect the reference count and result in memory leak.
The fix aligns the logic with the upstream commit.
[Test Plan]
1. Prepare a VM and run a TCP server on host.
2. Enter into the VM and set up a iptables rule.
iptables -I OUTPUT -p tcp --dst <-TCP_SERVER_IP-> -j NFQUEUE --queue-num=1 --queue-bypass
3. Run a nfnetlink client (should listen on NF queue 1) on VM.
4. Keep connecting the TCP server from VM.
while true; do netcat <-TCP_SERVER_IP-> 8080; done
5. The VM's TCP slab objects will accumulate and eventually encounter OOM situation.
cat /proc/slabinfo | grep TCP
[Where problems could occur]
The fix just aligns the logic with the upstream commit, so the regression can be considered as low. |
[Impact]
Environment: v4.15.0-177-generic Xenial ESM
Using NFQUEUE to delegate the decision on TCP packets to userspace processes will cause memory leak.
The symptom is that TCP slab objects will accumulate and eventually cause OOM.
[Fix]
There is a discrepancy between backport and upstream commit.
[Upstream Commit c3873070247d9e3c7a6b0cf9bf9b45e8018427b1]
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 9eed51e920e8..980daa6e1e3a 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -37,7 +37,7 @@ void nf_register_queue_handler(const struct nf_queue_handler *qh);
void nf_unregister_queue_handler(void);
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
-void nf_queue_entry_get_refs(struct nf_queue_entry *entry);
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
void nf_queue_entry_free(struct nf_queue_entry *entry);
static inline void init_hashrandom(u32 *jhash_initval)
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 5ab0680db445..e39549c55945 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -96,19 +96,21 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry)
}
/* Bump dev refs so they don't vanish while packet is out */
-void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
+ if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt))
+ return false;
+
dev_hold(state->in);
dev_hold(state->out);
- if (state->sk)
- sock_hold(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
dev_hold(entry->physin);
dev_hold(entry->physout);
#endif
+ return true;
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
@@ -196,7 +198,10 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
__nf_queue_entry_init_physdevs(entry);
- nf_queue_entry_get_refs(entry);
+ if (!nf_queue_entry_get_refs(entry)) {
+ kfree(entry);
+ return -ENOTCONN;
+ }
switch (entry->state.pf) {
case AF_INET:
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index ea2d9c2a44cf..64a6acb6aeae 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -710,9 +710,15 @@ static struct nf_queue_entry *
nf_queue_entry_dup(struct nf_queue_entry *e)
{
struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
- if (entry)
- nf_queue_entry_get_refs(entry);
- return entry;
+
+ if (!entry)
+ return NULL;
+
+ if (nf_queue_entry_get_refs(entry))
+ return entry;
+
+ kfree(entry);
+ return NULL;
}
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
[Backport Commit 4d032d60432327f068f03b516f5b6c51ceb17d15]
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 814058d0f167..f38cc6092c5a 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -32,7 +32,7 @@ void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *q
void nf_unregister_queue_handler(struct net *net);
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
-void nf_queue_entry_get_refs(struct nf_queue_entry *entry);
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
static inline void init_hashrandom(u32 *jhash_initval)
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 59340b3ef7ef..dbc45165c533 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -80,10 +80,13 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
/* Bump dev refs so they don't vanish while packet is out */
-void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
+ if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt))
+ return false;
+
if (state->in)
dev_hold(state->in);
if (state->out)
@@ -102,6 +105,7 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
dev_hold(physdev);
}
#endif
+ return true;
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
@@ -159,7 +163,11 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
.size = sizeof(*entry) + afinfo->route_key_size,
};
- nf_queue_entry_get_refs(entry);
+ if (!nf_queue_entry_get_refs(entry)) {
+ kfree(entry);
+ return -ENOTCONN;
+ }
+
afinfo->saveroute(skb, entry);
status = qh->outfn(entry, queuenum);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 48ed30e1b405..abad8bf6fa38 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -693,9 +693,15 @@ static struct nf_queue_entry *
nf_queue_entry_dup(struct nf_queue_entry *e)
{
struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
- if (entry)
- nf_queue_entry_get_refs(entry);
- return entry;
+
+ if (!entry)
+ return NULL;
+
+ if (nf_queue_entry_get_refs(entry))
+ return entry;
+
+ kfree(entry);
+ return NULL;
}
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
[Difference between Commits]
50,57c57,62
< dev_hold(state->in);
< dev_hold(state->out);
< - if (state->sk)
< - sock_hold(state->sk);
<
< #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
< dev_hold(entry->physin);
< dev_hold(entry->physout);
---
> if (state->in)
> dev_hold(state->in);
> if (state->out)
> @@ -102,6 +105,7 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
> dev_hold(physdev);
> }
The sock_hold() logic still remains in the backport commit, which will affect the reference count and result in memory leak.
The fix aligns the logic with the upstream commit.
[Test Plan]
1. Prepare a VM and run a TCP server on host.
2. Enter into the VM and set up a iptables rule.
iptables -I OUTPUT -p tcp --dst <-TCP_SERVER_IP-> -j NFQUEUE --queue-num=1 --queue-bypass
3. Run a nfnetlink client (should listen on NF queue 1) on VM.
4. Keep connecting the TCP server from VM.
while true; do netcat <-TCP_SERVER_IP-> 8080; done
5. The VM's TCP slab objects will accumulate and eventually encounter OOM situation.
cat /proc/slabinfo | grep TCP
[Where problems could occur]
The fix just aligns the logic with the upstream commit, so the regression can be considered as low. |
|