@@ -93,12 +93,29 @@ nf_conntrack_log_invalid - INTEGER
Log invalid packets of a type specified by value.
nf_conntrack_max - INTEGER
- Maximum number of allowed connection tracking entries. This value is set
- to nf_conntrack_buckets by default.
- Note that connection tracking entries are added to the table twice -- once
- for the original direction and once for the reply direction (i.e., with
- the reversed address). This means that with default settings a maxed-out
- table will have a average hash chain length of 2, not 1.
+ - 0 - disabled (unlimited)
+ - not 0 - enabled
+
+ Maximum number of allowed connection tracking entries per netns. This value
+ is set to nf_conntrack_buckets by default.
+
+ Note that connection tracking entries are added to the table twice -- once
+ for the original direction and once for the reply direction (i.e., with
+ the reversed address). This means that with default settings a maxed-out
+ table will have a average hash chain length of 2, not 1.
+
+ The limit of other netns cannot be greater than init_net netns.
+ +----------------+-------------+----------------+
+ | init_net netns | other netns | limit behavior |
+ +----------------+-------------+----------------+
+ | 0 | 0 | unlimited |
+ +----------------+-------------+----------------+
+ | 0 | not 0 | other |
+ +----------------+-------------+----------------+
+ | not 0 | 0 | init_net |
+ +----------------+-------------+----------------+
+ | not 0 | not 0 | min |
+ +----------------+-------------+----------------+
nf_conntrack_tcp_be_liberal - BOOLEAN
- 0 - disabled (default)
@@ -320,7 +320,6 @@ int nf_conntrack_hash_resize(unsigned int hashsize);
extern struct hlist_nulls_head *nf_conntrack_hash;
extern unsigned int nf_conntrack_htable_size;
extern seqcount_spinlock_t nf_conntrack_generation;
-extern unsigned int nf_conntrack_max;
/* must be called with rcu read lock held */
static inline void
@@ -360,6 +359,17 @@ static inline struct nf_conntrack_net *nf_ct_pernet(const struct net *net)
return net_generic(net, nf_conntrack_net_id);
}
+static inline unsigned int nf_conntrack_max(const struct net *net)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ return likely(init_net.ct.sysctl_max && net->ct.sysctl_max) ?
+ min(init_net.ct.sysctl_max, net->ct.sysctl_max) :
+ max(init_net.ct.sysctl_max, net->ct.sysctl_max);
+#else
+ return 0;
+#endif
+}
+
int nf_ct_skb_network_trim(struct sk_buff *skb, int family);
int nf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
u16 zone, u8 family, u8 *proto, u16 *mru);
@@ -102,6 +102,7 @@ struct netns_ct {
u8 sysctl_acct;
u8 sysctl_tstamp;
u8 sysctl_checksum;
+ unsigned int sysctl_max;
struct ip_conntrack_stat __percpu *stat;
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
@@ -202,8 +202,6 @@ static void nf_conntrack_all_unlock(void)
unsigned int nf_conntrack_htable_size __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
-unsigned int nf_conntrack_max __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_max);
seqcount_spinlock_t nf_conntrack_generation __read_mostly;
static siphash_aligned_key_t nf_conntrack_hash_rnd;
@@ -1498,7 +1496,7 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
static void gc_worker(struct work_struct *work)
{
- unsigned int i, hashsz, nf_conntrack_max95 = 0;
+ unsigned int i, hashsz;
u32 end_time, start_time = nfct_time_stamp;
struct conntrack_gc_work *gc_work;
unsigned int expired_count = 0;
@@ -1509,8 +1507,6 @@ static void gc_worker(struct work_struct *work)
gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
i = gc_work->next_bucket;
- if (gc_work->early_drop)
- nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
if (i == 0) {
gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
@@ -1538,6 +1534,7 @@ static void gc_worker(struct work_struct *work)
}
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
+ unsigned int nf_conntrack_max95 = 0;
struct nf_conntrack_net *cnet;
struct net *net;
long expires;
@@ -1567,11 +1564,14 @@ static void gc_worker(struct work_struct *work)
expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
expires = (expires - (long)next_run) / ++count;
next_run += expires;
+ net = nf_ct_net(tmp);
+
+ if (gc_work->early_drop)
+ nf_conntrack_max95 = nf_conntrack_max(net) / 100u * 95u;
if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
continue;
- net = nf_ct_net(tmp);
cnet = nf_ct_pernet(net);
if (atomic_read(&cnet->count) < nf_conntrack_max95)
continue;
@@ -1648,13 +1648,14 @@ __nf_conntrack_alloc(struct net *net,
gfp_t gfp, u32 hash)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
- unsigned int ct_count;
+ unsigned int ct_max, ct_count;
struct nf_conn *ct;
/* We don't want any race condition at early drop stage */
ct_count = atomic_inc_return(&cnet->count);
+ ct_max = nf_conntrack_max(net);
- if (nf_conntrack_max && unlikely(ct_count > nf_conntrack_max)) {
+ if (ct_max && unlikely(ct_count > ct_max)) {
if (!early_drop(net, hash)) {
if (!conntrack_gc_work.early_drop)
conntrack_gc_work.early_drop = true;
@@ -2650,7 +2651,7 @@ int nf_conntrack_init_start(void)
if (!nf_conntrack_hash)
return -ENOMEM;
- nf_conntrack_max = max_factor * nf_conntrack_htable_size;
+ init_net.ct.sysctl_max = max_factor * nf_conntrack_htable_size;
nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
sizeof(struct nf_conn),
@@ -2608,7 +2608,7 @@ ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks)))
goto nla_put_failure;
- if (nla_put_be32(skb, CTA_STATS_GLOBAL_MAX_ENTRIES, htonl(nf_conntrack_max)))
+ if (nla_put_be32(skb, CTA_STATS_GLOBAL_MAX_ENTRIES, htonl(nf_conntrack_max(net))))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -615,7 +615,7 @@ enum nf_ct_sysctl_index {
static struct ctl_table nf_ct_sysctl_table[] = {
[NF_SYSCTL_CT_MAX] = {
.procname = "nf_conntrack_max",
- .data = &nf_conntrack_max,
+ .data = &init_net.ct.sysctl_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -948,7 +948,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
static struct ctl_table nf_ct_netfilter_table[] = {
{
.procname = "nf_conntrack_max",
- .data = &nf_conntrack_max,
+ .data = &init_net.ct.sysctl_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -1063,6 +1063,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
table[NF_SYSCTL_CT_COUNT].data = &cnet->count;
table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum;
+ table[NF_SYSCTL_CT_MAX].data = &net->ct.sysctl_max;
table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid;
table[NF_SYSCTL_CT_ACCT].data = &net->ct.sysctl_acct;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
@@ -1087,7 +1088,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
/* Don't allow non-init_net ns to alter global sysctls */
if (!net_eq(&init_net, net)) {
- table[NF_SYSCTL_CT_MAX].mode = 0444;
table[NF_SYSCTL_CT_EXPECT_MAX].mode = 0444;
table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
}
@@ -1139,6 +1139,7 @@ static int nf_conntrack_pernet_init(struct net *net)
int ret;
net->ct.sysctl_checksum = 1;
+ net->ct.sysctl_max = init_net.ct.sysctl_max;
ret = nf_conntrack_standalone_init_sysctl(net);
if (ret < 0)