tcp.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. /*
  2. Copyright (c) 2007-2016 Contributors as noted in the AUTHORS file
  3. This file is part of libzmq, the ZeroMQ core engine in C++.
  4. libzmq is free software; you can redistribute it and/or modify it under
  5. the terms of the GNU Lesser General Public License (LGPL) as published
  6. by the Free Software Foundation; either version 3 of the License, or
  7. (at your option) any later version.
  8. As a special exception, the Contributors give you permission to link
  9. this library with independent modules to produce an executable,
  10. regardless of the license terms of these independent modules, and to
  11. copy and distribute the resulting executable under terms of your choice,
  12. provided that you also meet, for each linked independent module, the
  13. terms and conditions of the license of that module. An independent
  14. module is a module which is not derived from or based on this library.
  15. If you modify this library, you must extend this exception to your
  16. version of the library.
  17. libzmq is distributed in the hope that it will be useful, but WITHOUT
  18. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  19. FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
  20. License for more details.
  21. You should have received a copy of the GNU Lesser General Public License
  22. along with this program. If not, see <http://www.gnu.org/licenses/>.
  23. */
  24. #include "precompiled.hpp"
  25. #include "macros.hpp"
  26. #include "ip.hpp"
  27. #include "tcp.hpp"
  28. #include "err.hpp"
  29. #include "options.hpp"
  30. #if !defined ZMQ_HAVE_WINDOWS
  31. #include <fcntl.h>
  32. #include <sys/types.h>
  33. #include <sys/socket.h>
  34. #include <netinet/in.h>
  35. #include <netinet/tcp.h>
  36. #include <unistd.h>
  37. #ifdef ZMQ_HAVE_VXWORKS
  38. #include <sockLib.h>
  39. #endif
  40. #endif
  41. #if defined ZMQ_HAVE_OPENVMS
  42. #include <ioctl.h>
  43. #endif
  44. #ifdef __APPLE__
  45. #include <TargetConditionals.h>
  46. #endif
  47. int zmq::tune_tcp_socket (fd_t s_)
  48. {
  49. // Disable Nagle's algorithm. We are doing data batching on 0MQ level,
  50. // so using Nagle wouldn't improve throughput in anyway, but it would
  51. // hurt latency.
  52. int nodelay = 1;
  53. const int rc =
  54. setsockopt (s_, IPPROTO_TCP, TCP_NODELAY,
  55. reinterpret_cast<char *> (&nodelay), sizeof (int));
  56. assert_success_or_recoverable (s_, rc);
  57. if (rc != 0)
  58. return rc;
  59. #ifdef ZMQ_HAVE_OPENVMS
  60. // Disable delayed acknowledgements as they hurt latency significantly.
  61. int nodelack = 1;
  62. rc = setsockopt (s_, IPPROTO_TCP, TCP_NODELACK, (char *) &nodelack,
  63. sizeof (int));
  64. assert_success_or_recoverable (s_, rc);
  65. #endif
  66. return rc;
  67. }
  68. int zmq::set_tcp_send_buffer (fd_t sockfd_, int bufsize_)
  69. {
  70. const int rc =
  71. setsockopt (sockfd_, SOL_SOCKET, SO_SNDBUF,
  72. reinterpret_cast<char *> (&bufsize_), sizeof bufsize_);
  73. assert_success_or_recoverable (sockfd_, rc);
  74. return rc;
  75. }
  76. int zmq::set_tcp_receive_buffer (fd_t sockfd_, int bufsize_)
  77. {
  78. const int rc =
  79. setsockopt (sockfd_, SOL_SOCKET, SO_RCVBUF,
  80. reinterpret_cast<char *> (&bufsize_), sizeof bufsize_);
  81. assert_success_or_recoverable (sockfd_, rc);
  82. return rc;
  83. }
  84. int zmq::tune_tcp_keepalives (fd_t s_,
  85. int keepalive_,
  86. int keepalive_cnt_,
  87. int keepalive_idle_,
  88. int keepalive_intvl_)
  89. {
  90. // These options are used only under certain #ifdefs below.
  91. LIBZMQ_UNUSED (keepalive_);
  92. LIBZMQ_UNUSED (keepalive_cnt_);
  93. LIBZMQ_UNUSED (keepalive_idle_);
  94. LIBZMQ_UNUSED (keepalive_intvl_);
  95. // If none of the #ifdefs apply, then s_ is unused.
  96. LIBZMQ_UNUSED (s_);
  97. // Tuning TCP keep-alives if platform allows it
  98. // All values = -1 means skip and leave it for OS
  99. #ifdef ZMQ_HAVE_WINDOWS
  100. if (keepalive_ != -1) {
  101. tcp_keepalive keepalive_opts;
  102. keepalive_opts.onoff = keepalive_;
  103. keepalive_opts.keepalivetime =
  104. keepalive_idle_ != -1 ? keepalive_idle_ * 1000 : 7200000;
  105. keepalive_opts.keepaliveinterval =
  106. keepalive_intvl_ != -1 ? keepalive_intvl_ * 1000 : 1000;
  107. DWORD num_bytes_returned;
  108. const int rc = WSAIoctl (s_, SIO_KEEPALIVE_VALS, &keepalive_opts,
  109. sizeof (keepalive_opts), NULL, 0,
  110. &num_bytes_returned, NULL, NULL);
  111. assert_success_or_recoverable (s_, rc);
  112. if (rc == SOCKET_ERROR)
  113. return rc;
  114. }
  115. #else
  116. #ifdef ZMQ_HAVE_SO_KEEPALIVE
  117. if (keepalive_ != -1) {
  118. int rc =
  119. setsockopt (s_, SOL_SOCKET, SO_KEEPALIVE,
  120. reinterpret_cast<char *> (&keepalive_), sizeof (int));
  121. assert_success_or_recoverable (s_, rc);
  122. if (rc != 0)
  123. return rc;
  124. #ifdef ZMQ_HAVE_TCP_KEEPCNT
  125. if (keepalive_cnt_ != -1) {
  126. int rc = setsockopt (s_, IPPROTO_TCP, TCP_KEEPCNT, &keepalive_cnt_,
  127. sizeof (int));
  128. assert_success_or_recoverable (s_, rc);
  129. if (rc != 0)
  130. return rc;
  131. }
  132. #endif // ZMQ_HAVE_TCP_KEEPCNT
  133. #ifdef ZMQ_HAVE_TCP_KEEPIDLE
  134. if (keepalive_idle_ != -1) {
  135. int rc = setsockopt (s_, IPPROTO_TCP, TCP_KEEPIDLE,
  136. &keepalive_idle_, sizeof (int));
  137. assert_success_or_recoverable (s_, rc);
  138. if (rc != 0)
  139. return rc;
  140. }
  141. #else // ZMQ_HAVE_TCP_KEEPIDLE
  142. #ifdef ZMQ_HAVE_TCP_KEEPALIVE
  143. if (keepalive_idle_ != -1) {
  144. int rc = setsockopt (s_, IPPROTO_TCP, TCP_KEEPALIVE,
  145. &keepalive_idle_, sizeof (int));
  146. assert_success_or_recoverable (s_, rc);
  147. if (rc != 0)
  148. return rc;
  149. }
  150. #endif // ZMQ_HAVE_TCP_KEEPALIVE
  151. #endif // ZMQ_HAVE_TCP_KEEPIDLE
  152. #ifdef ZMQ_HAVE_TCP_KEEPINTVL
  153. if (keepalive_intvl_ != -1) {
  154. int rc = setsockopt (s_, IPPROTO_TCP, TCP_KEEPINTVL,
  155. &keepalive_intvl_, sizeof (int));
  156. assert_success_or_recoverable (s_, rc);
  157. if (rc != 0)
  158. return rc;
  159. }
  160. #endif // ZMQ_HAVE_TCP_KEEPINTVL
  161. }
  162. #endif // ZMQ_HAVE_SO_KEEPALIVE
  163. #endif // ZMQ_HAVE_WINDOWS
  164. return 0;
  165. }
  166. int zmq::tune_tcp_maxrt (fd_t sockfd_, int timeout_)
  167. {
  168. if (timeout_ <= 0)
  169. return 0;
  170. LIBZMQ_UNUSED (sockfd_);
  171. #if defined(ZMQ_HAVE_WINDOWS) && defined(TCP_MAXRT)
  172. // msdn says it's supported in >= Vista, >= Windows Server 2003
  173. timeout_ /= 1000; // in seconds
  174. const int rc =
  175. setsockopt (sockfd_, IPPROTO_TCP, TCP_MAXRT,
  176. reinterpret_cast<char *> (&timeout_), sizeof (timeout_));
  177. assert_success_or_recoverable (sockfd_, rc);
  178. return rc;
  179. // FIXME: should be ZMQ_HAVE_TCP_USER_TIMEOUT
  180. #elif defined(TCP_USER_TIMEOUT)
  181. int rc = setsockopt (sockfd_, IPPROTO_TCP, TCP_USER_TIMEOUT, &timeout_,
  182. sizeof (timeout_));
  183. assert_success_or_recoverable (sockfd_, rc);
  184. return rc;
  185. #else
  186. return 0;
  187. #endif
  188. }
  189. int zmq::tcp_write (fd_t s_, const void *data_, size_t size_)
  190. {
  191. #ifdef ZMQ_HAVE_WINDOWS
  192. const int nbytes = send (s_, (char *) data_, static_cast<int> (size_), 0);
  193. // If not a single byte can be written to the socket in non-blocking mode
  194. // we'll get an error (this may happen during the speculative write).
  195. const int last_error = WSAGetLastError ();
  196. if (nbytes == SOCKET_ERROR && last_error == WSAEWOULDBLOCK)
  197. return 0;
  198. // Signalise peer failure.
  199. if (nbytes == SOCKET_ERROR
  200. && (last_error == WSAENETDOWN || last_error == WSAENETRESET
  201. || last_error == WSAEHOSTUNREACH || last_error == WSAECONNABORTED
  202. || last_error == WSAETIMEDOUT || last_error == WSAECONNRESET))
  203. return -1;
  204. // Circumvent a Windows bug:
  205. // See https://support.microsoft.com/en-us/kb/201213
  206. // See https://zeromq.jira.com/browse/LIBZMQ-195
  207. if (nbytes == SOCKET_ERROR && last_error == WSAENOBUFS)
  208. return 0;
  209. wsa_assert (nbytes != SOCKET_ERROR);
  210. return nbytes;
  211. #else
  212. ssize_t nbytes = send (s_, static_cast<const char *> (data_), size_, 0);
  213. // Several errors are OK. When speculative write is being done we may not
  214. // be able to write a single byte from the socket. Also, SIGSTOP issued
  215. // by a debugging tool can result in EINTR error.
  216. if (nbytes == -1
  217. && (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR))
  218. return 0;
  219. // Signalise peer failure.
  220. if (nbytes == -1) {
  221. #if !defined(TARGET_OS_IPHONE) || !TARGET_OS_IPHONE
  222. errno_assert (errno != EACCES && errno != EBADF && errno != EDESTADDRREQ
  223. && errno != EFAULT && errno != EISCONN
  224. && errno != EMSGSIZE && errno != ENOMEM
  225. && errno != ENOTSOCK && errno != EOPNOTSUPP);
  226. #else
  227. errno_assert (errno != EACCES && errno != EDESTADDRREQ
  228. && errno != EFAULT && errno != EISCONN
  229. && errno != EMSGSIZE && errno != ENOMEM
  230. && errno != ENOTSOCK && errno != EOPNOTSUPP);
  231. #endif
  232. return -1;
  233. }
  234. return static_cast<int> (nbytes);
  235. #endif
  236. }
  237. int zmq::tcp_read (fd_t s_, void *data_, size_t size_)
  238. {
  239. #ifdef ZMQ_HAVE_WINDOWS
  240. const int rc =
  241. recv (s_, static_cast<char *> (data_), static_cast<int> (size_), 0);
  242. // If not a single byte can be read from the socket in non-blocking mode
  243. // we'll get an error (this may happen during the speculative read).
  244. if (rc == SOCKET_ERROR) {
  245. const int last_error = WSAGetLastError ();
  246. if (last_error == WSAEWOULDBLOCK) {
  247. errno = EAGAIN;
  248. } else {
  249. wsa_assert (
  250. last_error == WSAENETDOWN || last_error == WSAENETRESET
  251. || last_error == WSAECONNABORTED || last_error == WSAETIMEDOUT
  252. || last_error == WSAECONNRESET || last_error == WSAECONNREFUSED
  253. || last_error == WSAENOTCONN || last_error == WSAENOBUFS);
  254. errno = wsa_error_to_errno (last_error);
  255. }
  256. }
  257. return rc == SOCKET_ERROR ? -1 : rc;
  258. #else
  259. const ssize_t rc = recv (s_, static_cast<char *> (data_), size_, 0);
  260. // Several errors are OK. When speculative read is being done we may not
  261. // be able to read a single byte from the socket. Also, SIGSTOP issued
  262. // by a debugging tool can result in EINTR error.
  263. if (rc == -1) {
  264. #if !defined(TARGET_OS_IPHONE) || !TARGET_OS_IPHONE
  265. errno_assert (errno != EBADF && errno != EFAULT && errno != ENOMEM
  266. && errno != ENOTSOCK);
  267. #else
  268. errno_assert (errno != EFAULT && errno != ENOMEM && errno != ENOTSOCK);
  269. #endif
  270. if (errno == EWOULDBLOCK || errno == EINTR)
  271. errno = EAGAIN;
  272. }
  273. return static_cast<int> (rc);
  274. #endif
  275. }
  276. void zmq::tcp_tune_loopback_fast_path (const fd_t socket_)
  277. {
  278. #if defined ZMQ_HAVE_WINDOWS && defined SIO_LOOPBACK_FAST_PATH
  279. int sio_loopback_fastpath = 1;
  280. DWORD number_of_bytes_returned = 0;
  281. const int rc = WSAIoctl (
  282. socket_, SIO_LOOPBACK_FAST_PATH, &sio_loopback_fastpath,
  283. sizeof sio_loopback_fastpath, NULL, 0, &number_of_bytes_returned, 0, 0);
  284. if (SOCKET_ERROR == rc) {
  285. const DWORD last_error = ::WSAGetLastError ();
  286. if (WSAEOPNOTSUPP == last_error) {
  287. // This system is not Windows 8 or Server 2012, and the call is not supported.
  288. } else {
  289. wsa_assert (false);
  290. }
  291. }
  292. #else
  293. LIBZMQ_UNUSED (socket_);
  294. #endif
  295. }
  296. zmq::fd_t zmq::tcp_open_socket (const char *address_,
  297. const zmq::options_t &options_,
  298. bool local_,
  299. bool fallback_to_ipv4_,
  300. zmq::tcp_address_t *out_tcp_addr_)
  301. {
  302. // Convert the textual address into address structure.
  303. int rc = out_tcp_addr_->resolve (address_, local_, options_.ipv6);
  304. if (rc != 0)
  305. return retired_fd;
  306. // Create the socket.
  307. fd_t s = open_socket (out_tcp_addr_->family (), SOCK_STREAM, IPPROTO_TCP);
  308. // IPv6 address family not supported, try automatic downgrade to IPv4.
  309. if (s == retired_fd && fallback_to_ipv4_
  310. && out_tcp_addr_->family () == AF_INET6 && errno == EAFNOSUPPORT
  311. && options_.ipv6) {
  312. rc = out_tcp_addr_->resolve (address_, local_, false);
  313. if (rc != 0) {
  314. return retired_fd;
  315. }
  316. s = open_socket (AF_INET, SOCK_STREAM, IPPROTO_TCP);
  317. }
  318. if (s == retired_fd) {
  319. return retired_fd;
  320. }
  321. // On some systems, IPv4 mapping in IPv6 sockets is disabled by default.
  322. // Switch it on in such cases.
  323. if (out_tcp_addr_->family () == AF_INET6)
  324. enable_ipv4_mapping (s);
  325. // Set the IP Type-Of-Service priority for this socket
  326. if (options_.tos != 0)
  327. set_ip_type_of_service (s, options_.tos);
  328. // Set the socket to loopback fastpath if configured.
  329. if (options_.loopback_fastpath)
  330. tcp_tune_loopback_fast_path (s);
  331. // Bind the socket to a device if applicable
  332. if (!options_.bound_device.empty ())
  333. if (bind_to_device (s, options_.bound_device) == -1)
  334. goto setsockopt_error;
  335. // Set the socket buffer limits for the underlying socket.
  336. if (options_.sndbuf >= 0)
  337. set_tcp_send_buffer (s, options_.sndbuf);
  338. if (options_.rcvbuf >= 0)
  339. set_tcp_receive_buffer (s, options_.rcvbuf);
  340. return s;
  341. setsockopt_error:
  342. #ifdef ZMQ_HAVE_WINDOWS
  343. rc = closesocket (s);
  344. wsa_assert (rc != SOCKET_ERROR);
  345. #else
  346. rc = ::close (s);
  347. errno_assert (rc == 0);
  348. #endif
  349. return retired_fd;
  350. }