From 6c67ed044036c5268295fceb5c8dbb2c8ecc5b73 Mon Sep 17 00:00:00 2001 From: emanuele-f Date: Fri, 16 Aug 2019 16:55:40 +0200 Subject: [PATCH] MTU changes to avoid connection stall on PMTU discovery failures --- doc/MTU.md | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++ edge.c | 17 ++++++++++++++- edge_utils.c | 10 +++++++++ n2n.h | 8 ++++--- 4 files changed, 90 insertions(+), 4 deletions(-) create mode 100644 doc/MTU.md diff --git a/doc/MTU.md b/doc/MTU.md new file mode 100644 index 0000000..ceccb7b --- /dev/null +++ b/doc/MTU.md @@ -0,0 +1,59 @@ +MTU +--- + +The MTU of the VPN interface is set to a lower value (rather than the standard +1500 B value) to avoid excessive fragmentation on the datagram sent on internet. +This is required because n2n adds additional headers to the packets received from +the VPN interface. The size of the final frame sent through the internet interface +must have a size <= the internet interface MTU (usually 1500 B). + +As a fragmentation example, suppose that a 3000 B TCP segment should be sent through +the VPN. If the VPN interface MTU is set to 1500, the packet will be split into two +fragments of 1500 B each. However, n2n will add its headers to each fragment, so +each fragment becomes a 1540 B packet. The internet interface mtu, which is 1500 B, +will fragment each packet again in two further fragments (e.g. 1500 + 50 B), so a +total of 4 fragments will be sent over internet. On the other hand, if the VPN interface +MTU was set to 1460 that would result in only 3 fragments sent as the initial segment of +3000 would be split in 1460 + 1460 + 80 B and that would not be further fragmented. + +IP packet fragmentation in general is something to avoid, as described in +http://www.hpl.hp.com/techreports/Compaq-DEC/WRL-87-3.pdf . When possible, +the fragmentation should be moved to the TCP layer by a proper MSS value. +This can be forced by mangling the packet MSS, which is called "MSS clamping" (currently not +implemented in n2n). See https://github.com/gsliepen/tinc/blob/228a03aaa707a5fcced9dd5148a4bdb7e5ef025b/src/route.c#L386 . + +The exact value to use as a clamp value, however, depends on the PMTU, which is the minimum +MTU of the path between two hosts. Knowing the PMTU is also useful for a sender in order to +avoid fragmentation at the IP level. Trying to find the biggest MTU is useful since it allows to +maximize bandwidth. + +PMTU Discovery Failures +----------------------- + +Most operating systems try to periodically discover the PMTU by using a PMTU discovery algorithm. +This involves setting the DF (don't fragment) flag on the IP packets. When a large IP packet exceeds +the MTU of a router in the path, an "ICMP Fragmentation Needed" message should be received, which will +help the OS tune the size of the next IP packets. However, some routers do not report such ICMP message, +which results in packets being silently dropped. The `tracepath` tool can be used to detect the PMTU. + +The main problem when this situation occurs is that the actual PMTU is unknown, so an automatic +solution is not applicable. The user must manually specify a lower MTU for the VPN interface +in order to solve the issue. + +n2n and MTU +----------- + +n2n should work by default in different environments. For this reason, the following solution +has been provided: + +- PMTU discovery is disabled when possible (via the IP_MTU_DISCOVER socket option). This avoid + silently dropping a oversize packet due to the DF flag, however it possibly increments fragmentation on the path. + +- As examplained above, a lower MTU is set on the VPN interface, thus removing excessive fragmentation on + the sender. + +- 1400 B is used instead of 1500 B as the reference value for the internet interface MTU. + This essentially avoids fragmentation when the PMTU is lower than 1400 B. + +This is a conservative solution which should make n2n work by default. The user can manually +specify the MTU and re-enable PMTU discovery via the CLI options. diff --git a/edge.c b/edge.c index 3cc6ce3..d5d855e 100644 --- a/edge.c +++ b/edge.c @@ -134,12 +134,15 @@ static void help() { "[-f]" #endif /* #ifndef WIN32 */ #ifdef __linux__ -"[-T ]" + "[-T ]" #endif "[-m ] " "-l \n" " " "[-p ] [-M ] " +#ifdef __linux__ + "[-D] " +#endif "[-r] [-E] [-v] [-i ] [-t ] [-b] [-A] [-h]\n\n"); #if defined(N2N_CAN_NAME_IFACE) @@ -165,6 +168,10 @@ static void help() { printf("-m | Fix MAC address for the TAP interface (otherwise it may be random)\n" " | eg. -m 01:02:03:04:05:06\n"); printf("-M | Specify n2n MTU of edge interface (default %d).\n", DEFAULT_MTU); +#ifdef __linux__ + printf("-D | Enable PMTU discovery. PMTU discovery can reduce fragmentation but" + " | causes connections stall when not properly supported.\n"); +#endif printf("-r | Enable packet forwarding through n2n community.\n"); #ifdef N2N_HAVE_AES printf("-A | Use AES CBC for encryption (default=use twofish).\n"); @@ -252,6 +259,14 @@ static int setOption(int optkey, char *optargument, n2n_priv_config_t *ec, n2n_e break; } +#ifdef __linux__ + case 'D' : /* enable PMTU discovery */ + { + conf->disable_pmtu_discovery = 0; + break; + } +#endif + case 'k': /* encrypt key */ { if(conf->encrypt_key) free(conf->encrypt_key); diff --git a/edge_utils.c b/edge_utils.c index 8cb1566..585b4a5 100644 --- a/edge_utils.c +++ b/edge_utils.c @@ -1818,6 +1818,15 @@ static int edge_init_sockets(n2n_edge_t *eee, int udp_local_port, int mgmt_port, else traceEvent(TRACE_ERROR, "Could not set TOS 0x%x[%d]: %s", tos, errno, strerror(errno)); } + + if(eee->conf.disable_pmtu_discovery) { + int sockopt = 0; + + if(setsockopt(eee->udp_sock, IPPROTO_IP, IP_MTU_DISCOVER, &sockopt, sizeof(sockopt)) < 0) + traceEvent(TRACE_WARNING, "Could not disable PMTU discovery[%d]: %s", errno, strerror(errno)); + else + traceEvent(TRACE_DEBUG, "PMTU discovery disabled"); + } #endif eee->udp_mgmt_sock = open_socket(mgmt_port, 0 /* bind LOOPBACK */); @@ -1862,6 +1871,7 @@ void edge_init_conf_defaults(n2n_edge_conf_t *conf) { conf->transop_id = N2N_TRANSFORM_ID_NULL; conf->drop_multicast = 1; conf->allow_p2p = 1; + conf->disable_pmtu_discovery = 1; conf->register_interval = REGISTER_SUPER_INTERVAL_DFL; if(getenv("N2N_KEY")) { diff --git a/n2n.h b/n2n.h index e57be34..a99ff33 100644 --- a/n2n.h +++ b/n2n.h @@ -164,10 +164,11 @@ typedef struct tuntap_dev { * same value if they are to understand each other. */ #define N2N_COMPRESSION_ENABLED 1 -#define DEFAULT_MTU 1390 +#define DEFAULT_MTU 1290 -/** Uncomment this to enable the MTU check */ -//#define MTU_ASSERT_VALUE 1500 +/** Uncomment this to enable the MTU check, then try to ssh to generate a fragmented packet. */ +/** NOTE: see doc/MTU.md for an explanation on the 1400 value */ +//#define MTU_ASSERT_VALUE 1400 /** Common type used to hold stringified IP addresses. */ typedef char ipstr_t[32]; @@ -209,6 +210,7 @@ typedef struct n2n_edge_conf { uint8_t dyn_ip_mode; /**< Interface IP address is dynamically allocated, eg. DHCP. */ uint8_t allow_routing; /**< Accept packet no to interface address. */ uint8_t drop_multicast; /**< Multicast ethernet addresses. */ + uint8_t disable_pmtu_discovery; /**< Disable the Path MTU discovery. */ uint8_t allow_p2p; /**< Allow P2P connection */ uint8_t sn_num; /**< Number of supernode addresses defined. */ uint8_t tos; /** TOS for sent packets */