wepoll.c 66 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186
  1. /*
  2. * wepoll - epoll for Windows
  3. * https://github.com/piscisaureus/wepoll
  4. *
  5. * Copyright 2012-2018, Bert Belder <bertbelder@gmail.com>
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions are
  10. * met:
  11. *
  12. * * Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * * Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in the
  17. * documentation and/or other materials provided with the distribution.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. */
  31. #ifndef WEPOLL_EXPORT
  32. #define WEPOLL_EXPORT
  33. #endif
  34. #include <stdint.h>
  35. /* clang-format off */
  36. enum EPOLL_EVENTS {
  37. EPOLLIN = (int) (1U << 0),
  38. EPOLLPRI = (int) (1U << 1),
  39. EPOLLOUT = (int) (1U << 2),
  40. EPOLLERR = (int) (1U << 3),
  41. EPOLLHUP = (int) (1U << 4),
  42. EPOLLRDNORM = (int) (1U << 6),
  43. EPOLLRDBAND = (int) (1U << 7),
  44. EPOLLWRNORM = (int) (1U << 8),
  45. EPOLLWRBAND = (int) (1U << 9),
  46. EPOLLMSG = (int) (1U << 10), /* Never reported. */
  47. EPOLLRDHUP = (int) (1U << 13),
  48. EPOLLONESHOT = (int) (1U << 31)
  49. };
  50. #define EPOLLIN (1U << 0)
  51. #define EPOLLPRI (1U << 1)
  52. #define EPOLLOUT (1U << 2)
  53. #define EPOLLERR (1U << 3)
  54. #define EPOLLHUP (1U << 4)
  55. #define EPOLLRDNORM (1U << 6)
  56. #define EPOLLRDBAND (1U << 7)
  57. #define EPOLLWRNORM (1U << 8)
  58. #define EPOLLWRBAND (1U << 9)
  59. #define EPOLLMSG (1U << 10)
  60. #define EPOLLRDHUP (1U << 13)
  61. #define EPOLLONESHOT (1U << 31)
  62. #define EPOLL_CTL_ADD 1
  63. #define EPOLL_CTL_MOD 2
  64. #define EPOLL_CTL_DEL 3
  65. /* clang-format on */
  66. typedef void* HANDLE;
  67. typedef uintptr_t SOCKET;
  68. typedef union epoll_data {
  69. void* ptr;
  70. int fd;
  71. uint32_t u32;
  72. uint64_t u64;
  73. SOCKET sock; /* Windows specific */
  74. HANDLE hnd; /* Windows specific */
  75. } epoll_data_t;
  76. struct epoll_event {
  77. uint32_t events; /* Epoll events and flags */
  78. epoll_data_t data; /* User data variable */
  79. };
  80. #ifdef __cplusplus
  81. extern "C" {
  82. #endif
  83. WEPOLL_EXPORT HANDLE epoll_create(int size);
  84. WEPOLL_EXPORT HANDLE epoll_create1(int flags);
  85. WEPOLL_EXPORT int epoll_close(HANDLE ephnd);
  86. WEPOLL_EXPORT int epoll_ctl(HANDLE ephnd,
  87. int op,
  88. SOCKET sock,
  89. struct epoll_event* event);
  90. WEPOLL_EXPORT int epoll_wait(HANDLE ephnd,
  91. struct epoll_event* events,
  92. int maxevents,
  93. int timeout);
  94. #ifdef __cplusplus
  95. } /* extern "C" */
  96. #endif
  97. #include <malloc.h>
  98. #include <stdlib.h>
  99. #define WEPOLL_INTERNAL static
  100. #define WEPOLL_INTERNAL_VAR static
  101. #ifndef WIN32_LEAN_AND_MEAN
  102. #define WIN32_LEAN_AND_MEAN
  103. #endif
  104. #ifdef __clang__
  105. #pragma clang diagnostic push
  106. #pragma clang diagnostic ignored "-Wreserved-id-macro"
  107. #endif
  108. #ifdef _WIN32_WINNT
  109. #undef _WIN32_WINNT
  110. #endif
  111. #define _WIN32_WINNT 0x0600
  112. #ifdef __clang__
  113. #pragma clang diagnostic pop
  114. #endif
  115. #ifndef __GNUC__
  116. #pragma warning(push, 1)
  117. #endif
  118. #include <WS2tcpip.h>
  119. #include <WinSock2.h>
  120. #include <Windows.h>
  121. #ifndef __GNUC__
  122. #pragma warning(pop)
  123. #endif
  124. WEPOLL_INTERNAL int nt_global_init(void);
  125. typedef LONG NTSTATUS;
  126. typedef NTSTATUS* PNTSTATUS;
  127. #ifndef NT_SUCCESS
  128. #define NT_SUCCESS(status) (((NTSTATUS)(status)) >= 0)
  129. #endif
  130. #ifndef STATUS_SUCCESS
  131. #define STATUS_SUCCESS ((NTSTATUS) 0x00000000L)
  132. #endif
  133. #ifndef STATUS_PENDING
  134. #define STATUS_PENDING ((NTSTATUS) 0x00000103L)
  135. #endif
  136. #ifndef STATUS_CANCELLED
  137. #define STATUS_CANCELLED ((NTSTATUS) 0xC0000120L)
  138. #endif
  139. typedef struct _IO_STATUS_BLOCK {
  140. NTSTATUS Status;
  141. ULONG_PTR Information;
  142. } IO_STATUS_BLOCK, *PIO_STATUS_BLOCK;
  143. typedef VOID(NTAPI* PIO_APC_ROUTINE)(PVOID ApcContext,
  144. PIO_STATUS_BLOCK IoStatusBlock,
  145. ULONG Reserved);
  146. typedef struct _LSA_UNICODE_STRING {
  147. USHORT Length;
  148. USHORT MaximumLength;
  149. PWSTR Buffer;
  150. } LSA_UNICODE_STRING, *PLSA_UNICODE_STRING, UNICODE_STRING, *PUNICODE_STRING;
  151. #define RTL_CONSTANT_STRING(s) \
  152. { sizeof(s) - sizeof((s)[0]), sizeof(s), s }
  153. typedef struct _OBJECT_ATTRIBUTES {
  154. ULONG Length;
  155. HANDLE RootDirectory;
  156. PUNICODE_STRING ObjectName;
  157. ULONG Attributes;
  158. PVOID SecurityDescriptor;
  159. PVOID SecurityQualityOfService;
  160. } OBJECT_ATTRIBUTES, *POBJECT_ATTRIBUTES;
  161. #define RTL_CONSTANT_OBJECT_ATTRIBUTES(ObjectName, Attributes) \
  162. { sizeof(OBJECT_ATTRIBUTES), NULL, ObjectName, Attributes, NULL, NULL }
  163. #ifndef FILE_OPEN
  164. #define FILE_OPEN 0x00000001UL
  165. #endif
  166. #define NT_NTDLL_IMPORT_LIST(X) \
  167. X(NTSTATUS, \
  168. NTAPI, \
  169. NtCreateFile, \
  170. (PHANDLE FileHandle, \
  171. ACCESS_MASK DesiredAccess, \
  172. POBJECT_ATTRIBUTES ObjectAttributes, \
  173. PIO_STATUS_BLOCK IoStatusBlock, \
  174. PLARGE_INTEGER AllocationSize, \
  175. ULONG FileAttributes, \
  176. ULONG ShareAccess, \
  177. ULONG CreateDisposition, \
  178. ULONG CreateOptions, \
  179. PVOID EaBuffer, \
  180. ULONG EaLength)) \
  181. \
  182. X(NTSTATUS, \
  183. NTAPI, \
  184. NtDeviceIoControlFile, \
  185. (HANDLE FileHandle, \
  186. HANDLE Event, \
  187. PIO_APC_ROUTINE ApcRoutine, \
  188. PVOID ApcContext, \
  189. PIO_STATUS_BLOCK IoStatusBlock, \
  190. ULONG IoControlCode, \
  191. PVOID InputBuffer, \
  192. ULONG InputBufferLength, \
  193. PVOID OutputBuffer, \
  194. ULONG OutputBufferLength)) \
  195. \
  196. X(ULONG, WINAPI, RtlNtStatusToDosError, (NTSTATUS Status)) \
  197. \
  198. X(NTSTATUS, \
  199. NTAPI, \
  200. NtCreateKeyedEvent, \
  201. (PHANDLE handle, \
  202. ACCESS_MASK access, \
  203. POBJECT_ATTRIBUTES attr, \
  204. ULONG flags)) \
  205. \
  206. X(NTSTATUS, \
  207. NTAPI, \
  208. NtWaitForKeyedEvent, \
  209. (HANDLE handle, PVOID key, BOOLEAN alertable, PLARGE_INTEGER mstimeout)) \
  210. \
  211. X(NTSTATUS, \
  212. NTAPI, \
  213. NtReleaseKeyedEvent, \
  214. (HANDLE handle, PVOID key, BOOLEAN alertable, PLARGE_INTEGER mstimeout))
  215. #define X(return_type, attributes, name, parameters) \
  216. WEPOLL_INTERNAL_VAR return_type(attributes* name) parameters;
  217. NT_NTDLL_IMPORT_LIST(X)
  218. #undef X
  219. #include <assert.h>
  220. #include <stddef.h>
  221. #ifndef _SSIZE_T_DEFINED
  222. typedef intptr_t ssize_t;
  223. #endif
  224. #define array_count(a) (sizeof(a) / (sizeof((a)[0])))
  225. /* clang-format off */
  226. #define container_of(ptr, type, member) \
  227. ((type*) ((uintptr_t) (ptr) - offsetof(type, member)))
  228. /* clang-format on */
  229. #define unused_var(v) ((void) (v))
  230. /* Polyfill `inline` for older versions of msvc (up to Visual Studio 2013) */
  231. #if defined(_MSC_VER) && _MSC_VER < 1900
  232. #define inline __inline
  233. #endif
  234. /* clang-format off */
  235. #define AFD_POLL_RECEIVE 0x0001
  236. #define AFD_POLL_RECEIVE_EXPEDITED 0x0002
  237. #define AFD_POLL_SEND 0x0004
  238. #define AFD_POLL_DISCONNECT 0x0008
  239. #define AFD_POLL_ABORT 0x0010
  240. #define AFD_POLL_LOCAL_CLOSE 0x0020
  241. #define AFD_POLL_ACCEPT 0x0080
  242. #define AFD_POLL_CONNECT_FAIL 0x0100
  243. /* clang-format on */
  244. typedef struct _AFD_POLL_HANDLE_INFO {
  245. HANDLE Handle;
  246. ULONG Events;
  247. NTSTATUS Status;
  248. } AFD_POLL_HANDLE_INFO, *PAFD_POLL_HANDLE_INFO;
  249. typedef struct _AFD_POLL_INFO {
  250. LARGE_INTEGER Timeout;
  251. ULONG NumberOfHandles;
  252. ULONG Exclusive;
  253. AFD_POLL_HANDLE_INFO Handles[1];
  254. } AFD_POLL_INFO, *PAFD_POLL_INFO;
  255. WEPOLL_INTERNAL int afd_create_helper_handle(HANDLE iocp,
  256. HANDLE* afd_helper_handle_out);
  257. WEPOLL_INTERNAL int afd_poll(HANDLE afd_helper_handle,
  258. AFD_POLL_INFO* poll_info,
  259. OVERLAPPED* overlapped);
  260. #define return_map_error(value) \
  261. do { \
  262. err_map_win_error(); \
  263. return (value); \
  264. } while (0)
  265. #define return_set_error(value, error) \
  266. do { \
  267. err_set_win_error(error); \
  268. return (value); \
  269. } while (0)
  270. WEPOLL_INTERNAL void err_map_win_error(void);
  271. WEPOLL_INTERNAL void err_set_win_error(DWORD error);
  272. WEPOLL_INTERNAL int err_check_handle(HANDLE handle);
  273. WEPOLL_INTERNAL int ws_global_init(void);
  274. WEPOLL_INTERNAL SOCKET ws_get_base_socket(SOCKET socket);
  275. #define IOCTL_AFD_POLL 0x00012024
  276. static UNICODE_STRING afd__helper_name =
  277. RTL_CONSTANT_STRING(L"\\Device\\Afd\\Wepoll");
  278. static OBJECT_ATTRIBUTES afd__helper_attributes =
  279. RTL_CONSTANT_OBJECT_ATTRIBUTES(&afd__helper_name, 0);
  280. int afd_create_helper_handle(HANDLE iocp, HANDLE* afd_helper_handle_out) {
  281. HANDLE afd_helper_handle;
  282. IO_STATUS_BLOCK iosb;
  283. NTSTATUS status;
  284. /* By opening \Device\Afd without specifying any extended attributes, we'll
  285. * get a handle that lets us talk to the AFD driver, but that doesn't have an
  286. * associated endpoint (so it's not a socket). */
  287. status = NtCreateFile(&afd_helper_handle,
  288. SYNCHRONIZE,
  289. &afd__helper_attributes,
  290. &iosb,
  291. NULL,
  292. 0,
  293. FILE_SHARE_READ | FILE_SHARE_WRITE,
  294. FILE_OPEN,
  295. 0,
  296. NULL,
  297. 0);
  298. if (status != STATUS_SUCCESS)
  299. return_set_error(-1, RtlNtStatusToDosError(status));
  300. if (CreateIoCompletionPort(afd_helper_handle, iocp, 0, 0) == NULL)
  301. goto error;
  302. if (!SetFileCompletionNotificationModes(afd_helper_handle,
  303. FILE_SKIP_SET_EVENT_ON_HANDLE))
  304. goto error;
  305. *afd_helper_handle_out = afd_helper_handle;
  306. return 0;
  307. error:
  308. CloseHandle(afd_helper_handle);
  309. return_map_error(-1);
  310. }
  311. int afd_poll(HANDLE afd_helper_handle,
  312. AFD_POLL_INFO* poll_info,
  313. OVERLAPPED* overlapped) {
  314. IO_STATUS_BLOCK* iosb;
  315. HANDLE event;
  316. void* apc_context;
  317. NTSTATUS status;
  318. /* Blocking operation is not supported. */
  319. assert(overlapped != NULL);
  320. iosb = (IO_STATUS_BLOCK*) &overlapped->Internal;
  321. event = overlapped->hEvent;
  322. /* Do what other windows APIs would do: if hEvent has it's lowest bit set,
  323. * don't post a completion to the completion port. */
  324. if ((uintptr_t) event & 1) {
  325. event = (HANDLE)((uintptr_t) event & ~(uintptr_t) 1);
  326. apc_context = NULL;
  327. } else {
  328. apc_context = overlapped;
  329. }
  330. iosb->Status = STATUS_PENDING;
  331. status = NtDeviceIoControlFile(afd_helper_handle,
  332. event,
  333. NULL,
  334. apc_context,
  335. iosb,
  336. IOCTL_AFD_POLL,
  337. poll_info,
  338. sizeof *poll_info,
  339. poll_info,
  340. sizeof *poll_info);
  341. if (status == STATUS_SUCCESS)
  342. return 0;
  343. else if (status == STATUS_PENDING)
  344. return_set_error(-1, ERROR_IO_PENDING);
  345. else
  346. return_set_error(-1, RtlNtStatusToDosError(status));
  347. }
  348. WEPOLL_INTERNAL int epoll_global_init(void);
  349. WEPOLL_INTERNAL int init(void);
  350. #include <stdbool.h>
  351. typedef struct queue_node queue_node_t;
  352. typedef struct queue_node {
  353. queue_node_t* prev;
  354. queue_node_t* next;
  355. } queue_node_t;
  356. typedef struct queue {
  357. queue_node_t head;
  358. } queue_t;
  359. WEPOLL_INTERNAL void queue_init(queue_t* queue);
  360. WEPOLL_INTERNAL void queue_node_init(queue_node_t* node);
  361. WEPOLL_INTERNAL queue_node_t* queue_first(const queue_t* queue);
  362. WEPOLL_INTERNAL queue_node_t* queue_last(const queue_t* queue);
  363. WEPOLL_INTERNAL void queue_prepend(queue_t* queue, queue_node_t* node);
  364. WEPOLL_INTERNAL void queue_append(queue_t* queue, queue_node_t* node);
  365. WEPOLL_INTERNAL void queue_move_first(queue_t* queue, queue_node_t* node);
  366. WEPOLL_INTERNAL void queue_move_last(queue_t* queue, queue_node_t* node);
  367. WEPOLL_INTERNAL void queue_remove(queue_node_t* node);
  368. WEPOLL_INTERNAL bool queue_empty(const queue_t* queue);
  369. WEPOLL_INTERNAL bool queue_enqueued(const queue_node_t* node);
  370. typedef struct port_state port_state_t;
  371. typedef struct poll_group poll_group_t;
  372. WEPOLL_INTERNAL poll_group_t* poll_group_acquire(port_state_t* port);
  373. WEPOLL_INTERNAL void poll_group_release(poll_group_t* poll_group);
  374. WEPOLL_INTERNAL void poll_group_delete(poll_group_t* poll_group);
  375. WEPOLL_INTERNAL poll_group_t* poll_group_from_queue_node(
  376. queue_node_t* queue_node);
  377. WEPOLL_INTERNAL HANDLE
  378. poll_group_get_afd_helper_handle(poll_group_t* poll_group);
  379. /* N.b.: the tree functions do not set errno or LastError when they fail. Each
  380. * of the API functions has at most one failure mode. It is up to the caller to
  381. * set an appropriate error code when necessary. */
  382. typedef struct tree tree_t;
  383. typedef struct tree_node tree_node_t;
  384. typedef struct tree {
  385. tree_node_t* root;
  386. } tree_t;
  387. typedef struct tree_node {
  388. tree_node_t* left;
  389. tree_node_t* right;
  390. tree_node_t* parent;
  391. uintptr_t key;
  392. bool red;
  393. } tree_node_t;
  394. WEPOLL_INTERNAL void tree_init(tree_t* tree);
  395. WEPOLL_INTERNAL void tree_node_init(tree_node_t* node);
  396. WEPOLL_INTERNAL int tree_add(tree_t* tree, tree_node_t* node, uintptr_t key);
  397. WEPOLL_INTERNAL void tree_del(tree_t* tree, tree_node_t* node);
  398. WEPOLL_INTERNAL tree_node_t* tree_find(const tree_t* tree, uintptr_t key);
  399. WEPOLL_INTERNAL tree_node_t* tree_root(const tree_t* tree);
  400. typedef struct port_state port_state_t;
  401. typedef struct sock_state sock_state_t;
  402. WEPOLL_INTERNAL sock_state_t* sock_new(port_state_t* port_state,
  403. SOCKET socket);
  404. WEPOLL_INTERNAL void sock_delete(port_state_t* port_state,
  405. sock_state_t* sock_state);
  406. WEPOLL_INTERNAL void sock_force_delete(port_state_t* port_state,
  407. sock_state_t* sock_state);
  408. WEPOLL_INTERNAL int sock_set_event(port_state_t* port_state,
  409. sock_state_t* sock_state,
  410. const struct epoll_event* ev);
  411. WEPOLL_INTERNAL int sock_update(port_state_t* port_state,
  412. sock_state_t* sock_state);
  413. WEPOLL_INTERNAL int sock_feed_event(port_state_t* port_state,
  414. OVERLAPPED* overlapped,
  415. struct epoll_event* ev);
  416. WEPOLL_INTERNAL sock_state_t* sock_state_from_queue_node(
  417. queue_node_t* queue_node);
  418. WEPOLL_INTERNAL queue_node_t* sock_state_to_queue_node(
  419. sock_state_t* sock_state);
  420. WEPOLL_INTERNAL sock_state_t* sock_state_from_tree_node(
  421. tree_node_t* tree_node);
  422. WEPOLL_INTERNAL tree_node_t* sock_state_to_tree_node(sock_state_t* sock_state);
  423. /* The reflock is a special kind of lock that normally prevents a chunk of
  424. * memory from being freed, but does allow the chunk of memory to eventually be
  425. * released in a coordinated fashion.
  426. *
  427. * Under normal operation, threads increase and decrease the reference count,
  428. * which are wait-free operations.
  429. *
  430. * Exactly once during the reflock's lifecycle, a thread holding a reference to
  431. * the lock may "destroy" the lock; this operation blocks until all other
  432. * threads holding a reference to the lock have dereferenced it. After
  433. * "destroy" returns, the calling thread may assume that no other threads have
  434. * a reference to the lock.
  435. *
  436. * Attemmpting to lock or destroy a lock after reflock_unref_and_destroy() has
  437. * been called is invalid and results in undefined behavior. Therefore the user
  438. * should use another lock to guarantee that this can't happen.
  439. */
  440. typedef struct reflock {
  441. volatile long state; /* 32-bit Interlocked APIs operate on `long` values. */
  442. } reflock_t;
  443. WEPOLL_INTERNAL int reflock_global_init(void);
  444. WEPOLL_INTERNAL void reflock_init(reflock_t* reflock);
  445. WEPOLL_INTERNAL void reflock_ref(reflock_t* reflock);
  446. WEPOLL_INTERNAL void reflock_unref(reflock_t* reflock);
  447. WEPOLL_INTERNAL void reflock_unref_and_destroy(reflock_t* reflock);
  448. typedef struct ts_tree {
  449. tree_t tree;
  450. SRWLOCK lock;
  451. } ts_tree_t;
  452. typedef struct ts_tree_node {
  453. tree_node_t tree_node;
  454. reflock_t reflock;
  455. } ts_tree_node_t;
  456. WEPOLL_INTERNAL void ts_tree_init(ts_tree_t* rtl);
  457. WEPOLL_INTERNAL void ts_tree_node_init(ts_tree_node_t* node);
  458. WEPOLL_INTERNAL int ts_tree_add(ts_tree_t* ts_tree,
  459. ts_tree_node_t* node,
  460. uintptr_t key);
  461. WEPOLL_INTERNAL ts_tree_node_t* ts_tree_del_and_ref(ts_tree_t* ts_tree,
  462. uintptr_t key);
  463. WEPOLL_INTERNAL ts_tree_node_t* ts_tree_find_and_ref(ts_tree_t* ts_tree,
  464. uintptr_t key);
  465. WEPOLL_INTERNAL void ts_tree_node_unref(ts_tree_node_t* node);
  466. WEPOLL_INTERNAL void ts_tree_node_unref_and_destroy(ts_tree_node_t* node);
  467. typedef struct port_state port_state_t;
  468. typedef struct sock_state sock_state_t;
  469. typedef struct port_state {
  470. HANDLE iocp;
  471. tree_t sock_tree;
  472. queue_t sock_update_queue;
  473. queue_t sock_deleted_queue;
  474. queue_t poll_group_queue;
  475. ts_tree_node_t handle_tree_node;
  476. CRITICAL_SECTION lock;
  477. size_t active_poll_count;
  478. } port_state_t;
  479. WEPOLL_INTERNAL port_state_t* port_new(HANDLE* iocp_out);
  480. WEPOLL_INTERNAL int port_close(port_state_t* port_state);
  481. WEPOLL_INTERNAL int port_delete(port_state_t* port_state);
  482. WEPOLL_INTERNAL int port_wait(port_state_t* port_state,
  483. struct epoll_event* events,
  484. int maxevents,
  485. int timeout);
  486. WEPOLL_INTERNAL int port_ctl(port_state_t* port_state,
  487. int op,
  488. SOCKET sock,
  489. struct epoll_event* ev);
  490. WEPOLL_INTERNAL int port_register_socket_handle(port_state_t* port_state,
  491. sock_state_t* sock_state,
  492. SOCKET socket);
  493. WEPOLL_INTERNAL void port_unregister_socket_handle(port_state_t* port_state,
  494. sock_state_t* sock_state);
  495. WEPOLL_INTERNAL sock_state_t* port_find_socket(port_state_t* port_state,
  496. SOCKET socket);
  497. WEPOLL_INTERNAL void port_request_socket_update(port_state_t* port_state,
  498. sock_state_t* sock_state);
  499. WEPOLL_INTERNAL void port_cancel_socket_update(port_state_t* port_state,
  500. sock_state_t* sock_state);
  501. WEPOLL_INTERNAL void port_add_deleted_socket(port_state_t* port_state,
  502. sock_state_t* sock_state);
  503. WEPOLL_INTERNAL void port_remove_deleted_socket(port_state_t* port_state,
  504. sock_state_t* sock_state);
  505. static ts_tree_t epoll__handle_tree;
  506. static inline port_state_t* epoll__handle_tree_node_to_port(
  507. ts_tree_node_t* tree_node) {
  508. return container_of(tree_node, port_state_t, handle_tree_node);
  509. }
  510. int epoll_global_init(void) {
  511. ts_tree_init(&epoll__handle_tree);
  512. return 0;
  513. }
  514. static HANDLE epoll__create(void) {
  515. port_state_t* port_state;
  516. HANDLE ephnd;
  517. if (init() < 0)
  518. return NULL;
  519. port_state = port_new(&ephnd);
  520. if (port_state == NULL)
  521. return NULL;
  522. if (ts_tree_add(&epoll__handle_tree,
  523. &port_state->handle_tree_node,
  524. (uintptr_t) ephnd) < 0) {
  525. /* This should never happen. */
  526. port_delete(port_state);
  527. return_set_error(NULL, ERROR_ALREADY_EXISTS);
  528. }
  529. return ephnd;
  530. }
  531. HANDLE epoll_create(int size) {
  532. if (size <= 0)
  533. return_set_error(NULL, ERROR_INVALID_PARAMETER);
  534. return epoll__create();
  535. }
  536. HANDLE epoll_create1(int flags) {
  537. if (flags != 0)
  538. return_set_error(NULL, ERROR_INVALID_PARAMETER);
  539. return epoll__create();
  540. }
  541. int epoll_close(HANDLE ephnd) {
  542. ts_tree_node_t* tree_node;
  543. port_state_t* port_state;
  544. if (init() < 0)
  545. return -1;
  546. tree_node = ts_tree_del_and_ref(&epoll__handle_tree, (uintptr_t) ephnd);
  547. if (tree_node == NULL) {
  548. err_set_win_error(ERROR_INVALID_PARAMETER);
  549. goto err;
  550. }
  551. port_state = epoll__handle_tree_node_to_port(tree_node);
  552. port_close(port_state);
  553. ts_tree_node_unref_and_destroy(tree_node);
  554. return port_delete(port_state);
  555. err:
  556. err_check_handle(ephnd);
  557. return -1;
  558. }
  559. int epoll_ctl(HANDLE ephnd, int op, SOCKET sock, struct epoll_event* ev) {
  560. ts_tree_node_t* tree_node;
  561. port_state_t* port_state;
  562. int r;
  563. if (init() < 0)
  564. return -1;
  565. tree_node = ts_tree_find_and_ref(&epoll__handle_tree, (uintptr_t) ephnd);
  566. if (tree_node == NULL) {
  567. err_set_win_error(ERROR_INVALID_PARAMETER);
  568. goto err;
  569. }
  570. port_state = epoll__handle_tree_node_to_port(tree_node);
  571. r = port_ctl(port_state, op, sock, ev);
  572. ts_tree_node_unref(tree_node);
  573. if (r < 0)
  574. goto err;
  575. return 0;
  576. err:
  577. /* On Linux, in the case of epoll_ctl_mod(), EBADF takes priority over other
  578. * errors. Wepoll mimics this behavior. */
  579. err_check_handle(ephnd);
  580. err_check_handle((HANDLE) sock);
  581. return -1;
  582. }
  583. int epoll_wait(HANDLE ephnd,
  584. struct epoll_event* events,
  585. int maxevents,
  586. int timeout) {
  587. ts_tree_node_t* tree_node;
  588. port_state_t* port_state;
  589. int num_events;
  590. if (maxevents <= 0)
  591. return_set_error(-1, ERROR_INVALID_PARAMETER);
  592. if (init() < 0)
  593. return -1;
  594. tree_node = ts_tree_find_and_ref(&epoll__handle_tree, (uintptr_t) ephnd);
  595. if (tree_node == NULL) {
  596. err_set_win_error(ERROR_INVALID_PARAMETER);
  597. goto err;
  598. }
  599. port_state = epoll__handle_tree_node_to_port(tree_node);
  600. num_events = port_wait(port_state, events, maxevents, timeout);
  601. ts_tree_node_unref(tree_node);
  602. if (num_events < 0)
  603. goto err;
  604. return num_events;
  605. err:
  606. err_check_handle(ephnd);
  607. return -1;
  608. }
  609. #include <errno.h>
  610. #define ERR__ERRNO_MAPPINGS(X) \
  611. X(ERROR_ACCESS_DENIED, EACCES) \
  612. X(ERROR_ALREADY_EXISTS, EEXIST) \
  613. X(ERROR_BAD_COMMAND, EACCES) \
  614. X(ERROR_BAD_EXE_FORMAT, ENOEXEC) \
  615. X(ERROR_BAD_LENGTH, EACCES) \
  616. X(ERROR_BAD_NETPATH, ENOENT) \
  617. X(ERROR_BAD_NET_NAME, ENOENT) \
  618. X(ERROR_BAD_NET_RESP, ENETDOWN) \
  619. X(ERROR_BAD_PATHNAME, ENOENT) \
  620. X(ERROR_BROKEN_PIPE, EPIPE) \
  621. X(ERROR_CANNOT_MAKE, EACCES) \
  622. X(ERROR_COMMITMENT_LIMIT, ENOMEM) \
  623. X(ERROR_CONNECTION_ABORTED, ECONNABORTED) \
  624. X(ERROR_CONNECTION_ACTIVE, EISCONN) \
  625. X(ERROR_CONNECTION_REFUSED, ECONNREFUSED) \
  626. X(ERROR_CRC, EACCES) \
  627. X(ERROR_DIR_NOT_EMPTY, ENOTEMPTY) \
  628. X(ERROR_DISK_FULL, ENOSPC) \
  629. X(ERROR_DUP_NAME, EADDRINUSE) \
  630. X(ERROR_FILENAME_EXCED_RANGE, ENOENT) \
  631. X(ERROR_FILE_NOT_FOUND, ENOENT) \
  632. X(ERROR_GEN_FAILURE, EACCES) \
  633. X(ERROR_GRACEFUL_DISCONNECT, EPIPE) \
  634. X(ERROR_HOST_DOWN, EHOSTUNREACH) \
  635. X(ERROR_HOST_UNREACHABLE, EHOSTUNREACH) \
  636. X(ERROR_INSUFFICIENT_BUFFER, EFAULT) \
  637. X(ERROR_INVALID_ADDRESS, EADDRNOTAVAIL) \
  638. X(ERROR_INVALID_FUNCTION, EINVAL) \
  639. X(ERROR_INVALID_HANDLE, EBADF) \
  640. X(ERROR_INVALID_NETNAME, EADDRNOTAVAIL) \
  641. X(ERROR_INVALID_PARAMETER, EINVAL) \
  642. X(ERROR_INVALID_USER_BUFFER, EMSGSIZE) \
  643. X(ERROR_IO_PENDING, EINPROGRESS) \
  644. X(ERROR_LOCK_VIOLATION, EACCES) \
  645. X(ERROR_MORE_DATA, EMSGSIZE) \
  646. X(ERROR_NETNAME_DELETED, ECONNABORTED) \
  647. X(ERROR_NETWORK_ACCESS_DENIED, EACCES) \
  648. X(ERROR_NETWORK_BUSY, ENETDOWN) \
  649. X(ERROR_NETWORK_UNREACHABLE, ENETUNREACH) \
  650. X(ERROR_NOACCESS, EFAULT) \
  651. X(ERROR_NONPAGED_SYSTEM_RESOURCES, ENOMEM) \
  652. X(ERROR_NOT_ENOUGH_MEMORY, ENOMEM) \
  653. X(ERROR_NOT_ENOUGH_QUOTA, ENOMEM) \
  654. X(ERROR_NOT_FOUND, ENOENT) \
  655. X(ERROR_NOT_LOCKED, EACCES) \
  656. X(ERROR_NOT_READY, EACCES) \
  657. X(ERROR_NOT_SAME_DEVICE, EXDEV) \
  658. X(ERROR_NOT_SUPPORTED, ENOTSUP) \
  659. X(ERROR_NO_MORE_FILES, ENOENT) \
  660. X(ERROR_NO_SYSTEM_RESOURCES, ENOMEM) \
  661. X(ERROR_OPERATION_ABORTED, EINTR) \
  662. X(ERROR_OUT_OF_PAPER, EACCES) \
  663. X(ERROR_PAGED_SYSTEM_RESOURCES, ENOMEM) \
  664. X(ERROR_PAGEFILE_QUOTA, ENOMEM) \
  665. X(ERROR_PATH_NOT_FOUND, ENOENT) \
  666. X(ERROR_PIPE_NOT_CONNECTED, EPIPE) \
  667. X(ERROR_PORT_UNREACHABLE, ECONNRESET) \
  668. X(ERROR_PROTOCOL_UNREACHABLE, ENETUNREACH) \
  669. X(ERROR_REM_NOT_LIST, ECONNREFUSED) \
  670. X(ERROR_REQUEST_ABORTED, EINTR) \
  671. X(ERROR_REQ_NOT_ACCEP, EWOULDBLOCK) \
  672. X(ERROR_SECTOR_NOT_FOUND, EACCES) \
  673. X(ERROR_SEM_TIMEOUT, ETIMEDOUT) \
  674. X(ERROR_SHARING_VIOLATION, EACCES) \
  675. X(ERROR_TOO_MANY_NAMES, ENOMEM) \
  676. X(ERROR_TOO_MANY_OPEN_FILES, EMFILE) \
  677. X(ERROR_UNEXP_NET_ERR, ECONNABORTED) \
  678. X(ERROR_WAIT_NO_CHILDREN, ECHILD) \
  679. X(ERROR_WORKING_SET_QUOTA, ENOMEM) \
  680. X(ERROR_WRITE_PROTECT, EACCES) \
  681. X(ERROR_WRONG_DISK, EACCES) \
  682. X(WSAEACCES, EACCES) \
  683. X(WSAEADDRINUSE, EADDRINUSE) \
  684. X(WSAEADDRNOTAVAIL, EADDRNOTAVAIL) \
  685. X(WSAEAFNOSUPPORT, EAFNOSUPPORT) \
  686. X(WSAECONNABORTED, ECONNABORTED) \
  687. X(WSAECONNREFUSED, ECONNREFUSED) \
  688. X(WSAECONNRESET, ECONNRESET) \
  689. X(WSAEDISCON, EPIPE) \
  690. X(WSAEFAULT, EFAULT) \
  691. X(WSAEHOSTDOWN, EHOSTUNREACH) \
  692. X(WSAEHOSTUNREACH, EHOSTUNREACH) \
  693. X(WSAEINPROGRESS, EBUSY) \
  694. X(WSAEINTR, EINTR) \
  695. X(WSAEINVAL, EINVAL) \
  696. X(WSAEISCONN, EISCONN) \
  697. X(WSAEMSGSIZE, EMSGSIZE) \
  698. X(WSAENETDOWN, ENETDOWN) \
  699. X(WSAENETRESET, EHOSTUNREACH) \
  700. X(WSAENETUNREACH, ENETUNREACH) \
  701. X(WSAENOBUFS, ENOMEM) \
  702. X(WSAENOTCONN, ENOTCONN) \
  703. X(WSAENOTSOCK, ENOTSOCK) \
  704. X(WSAEOPNOTSUPP, EOPNOTSUPP) \
  705. X(WSAEPROCLIM, ENOMEM) \
  706. X(WSAESHUTDOWN, EPIPE) \
  707. X(WSAETIMEDOUT, ETIMEDOUT) \
  708. X(WSAEWOULDBLOCK, EWOULDBLOCK) \
  709. X(WSANOTINITIALISED, ENETDOWN) \
  710. X(WSASYSNOTREADY, ENETDOWN) \
  711. X(WSAVERNOTSUPPORTED, ENOSYS)
  712. static errno_t err__map_win_error_to_errno(DWORD error) {
  713. switch (error) {
  714. #define X(error_sym, errno_sym) \
  715. case error_sym: \
  716. return errno_sym;
  717. ERR__ERRNO_MAPPINGS(X)
  718. #undef X
  719. }
  720. return EINVAL;
  721. }
  722. void err_map_win_error(void) {
  723. errno = err__map_win_error_to_errno(GetLastError());
  724. }
  725. void err_set_win_error(DWORD error) {
  726. SetLastError(error);
  727. errno = err__map_win_error_to_errno(error);
  728. }
  729. int err_check_handle(HANDLE handle) {
  730. DWORD flags;
  731. /* GetHandleInformation() succeeds when passed INVALID_HANDLE_VALUE, so check
  732. * for this condition explicitly. */
  733. if (handle == INVALID_HANDLE_VALUE)
  734. return_set_error(-1, ERROR_INVALID_HANDLE);
  735. if (!GetHandleInformation(handle, &flags))
  736. return_map_error(-1);
  737. return 0;
  738. }
  739. static bool init__done = false;
  740. static INIT_ONCE init__once = INIT_ONCE_STATIC_INIT;
  741. static BOOL CALLBACK init__once_callback(INIT_ONCE* once,
  742. void* parameter,
  743. void** context) {
  744. unused_var(once);
  745. unused_var(parameter);
  746. unused_var(context);
  747. /* N.b. that initialization order matters here. */
  748. if (ws_global_init() < 0 || nt_global_init() < 0 ||
  749. reflock_global_init() < 0 || epoll_global_init() < 0)
  750. return FALSE;
  751. init__done = true;
  752. return TRUE;
  753. }
  754. int init(void) {
  755. if (!init__done &&
  756. !InitOnceExecuteOnce(&init__once, init__once_callback, NULL, NULL))
  757. return -1; /* LastError and errno aren't touched InitOnceExecuteOnce. */
  758. return 0;
  759. }
  760. /* Set up a workaround for the following problem:
  761. * FARPROC addr = GetProcAddress(...);
  762. * MY_FUNC func = (MY_FUNC) addr; <-- GCC 8 warning/error.
  763. * MY_FUNC func = (MY_FUNC) (void*) addr; <-- MSVC warning/error.
  764. * To compile cleanly with either compiler, do casts with this "bridge" type:
  765. * MY_FUNC func = (MY_FUNC) (nt__fn_ptr_cast_t) addr; */
  766. #ifdef __GNUC__
  767. typedef void* nt__fn_ptr_cast_t;
  768. #else
  769. typedef FARPROC nt__fn_ptr_cast_t;
  770. #endif
  771. #define X(return_type, attributes, name, parameters) \
  772. WEPOLL_INTERNAL return_type(attributes* name) parameters = NULL;
  773. NT_NTDLL_IMPORT_LIST(X)
  774. #undef X
  775. int nt_global_init(void) {
  776. HMODULE ntdll;
  777. FARPROC fn_ptr;
  778. ntdll = GetModuleHandleW(L"ntdll.dll");
  779. if (ntdll == NULL)
  780. return -1;
  781. #define X(return_type, attributes, name, parameters) \
  782. fn_ptr = GetProcAddress(ntdll, #name); \
  783. if (fn_ptr == NULL) \
  784. return -1; \
  785. name = (return_type(attributes*) parameters)(nt__fn_ptr_cast_t) fn_ptr;
  786. NT_NTDLL_IMPORT_LIST(X)
  787. #undef X
  788. return 0;
  789. }
  790. #include <string.h>
  791. static const size_t POLL_GROUP__MAX_GROUP_SIZE = 32;
  792. typedef struct poll_group {
  793. port_state_t* port_state;
  794. queue_node_t queue_node;
  795. HANDLE afd_helper_handle;
  796. size_t group_size;
  797. } poll_group_t;
  798. static poll_group_t* poll_group__new(port_state_t* port_state) {
  799. poll_group_t* poll_group = malloc(sizeof *poll_group);
  800. if (poll_group == NULL)
  801. return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY);
  802. memset(poll_group, 0, sizeof *poll_group);
  803. queue_node_init(&poll_group->queue_node);
  804. poll_group->port_state = port_state;
  805. if (afd_create_helper_handle(port_state->iocp,
  806. &poll_group->afd_helper_handle) < 0) {
  807. free(poll_group);
  808. return NULL;
  809. }
  810. queue_append(&port_state->poll_group_queue, &poll_group->queue_node);
  811. return poll_group;
  812. }
  813. void poll_group_delete(poll_group_t* poll_group) {
  814. assert(poll_group->group_size == 0);
  815. CloseHandle(poll_group->afd_helper_handle);
  816. queue_remove(&poll_group->queue_node);
  817. free(poll_group);
  818. }
  819. poll_group_t* poll_group_from_queue_node(queue_node_t* queue_node) {
  820. return container_of(queue_node, poll_group_t, queue_node);
  821. }
  822. HANDLE poll_group_get_afd_helper_handle(poll_group_t* poll_group) {
  823. return poll_group->afd_helper_handle;
  824. }
  825. poll_group_t* poll_group_acquire(port_state_t* port_state) {
  826. queue_t* queue = &port_state->poll_group_queue;
  827. poll_group_t* poll_group =
  828. !queue_empty(queue)
  829. ? container_of(queue_last(queue), poll_group_t, queue_node)
  830. : NULL;
  831. if (poll_group == NULL ||
  832. poll_group->group_size >= POLL_GROUP__MAX_GROUP_SIZE)
  833. poll_group = poll_group__new(port_state);
  834. if (poll_group == NULL)
  835. return NULL;
  836. if (++poll_group->group_size == POLL_GROUP__MAX_GROUP_SIZE)
  837. queue_move_first(&port_state->poll_group_queue, &poll_group->queue_node);
  838. return poll_group;
  839. }
  840. void poll_group_release(poll_group_t* poll_group) {
  841. port_state_t* port_state = poll_group->port_state;
  842. poll_group->group_size--;
  843. assert(poll_group->group_size < POLL_GROUP__MAX_GROUP_SIZE);
  844. queue_move_last(&port_state->poll_group_queue, &poll_group->queue_node);
  845. /* Poll groups are currently only freed when the epoll port is closed. */
  846. }
  847. #define PORT__MAX_ON_STACK_COMPLETIONS 256
  848. static port_state_t* port__alloc(void) {
  849. port_state_t* port_state = malloc(sizeof *port_state);
  850. if (port_state == NULL)
  851. return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY);
  852. return port_state;
  853. }
  854. static void port__free(port_state_t* port) {
  855. assert(port != NULL);
  856. free(port);
  857. }
  858. static HANDLE port__create_iocp(void) {
  859. HANDLE iocp = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
  860. if (iocp == NULL)
  861. return_map_error(NULL);
  862. return iocp;
  863. }
  864. port_state_t* port_new(HANDLE* iocp_out) {
  865. port_state_t* port_state;
  866. HANDLE iocp;
  867. port_state = port__alloc();
  868. if (port_state == NULL)
  869. goto err1;
  870. iocp = port__create_iocp();
  871. if (iocp == NULL)
  872. goto err2;
  873. memset(port_state, 0, sizeof *port_state);
  874. port_state->iocp = iocp;
  875. tree_init(&port_state->sock_tree);
  876. queue_init(&port_state->sock_update_queue);
  877. queue_init(&port_state->sock_deleted_queue);
  878. queue_init(&port_state->poll_group_queue);
  879. ts_tree_node_init(&port_state->handle_tree_node);
  880. InitializeCriticalSection(&port_state->lock);
  881. *iocp_out = iocp;
  882. return port_state;
  883. err2:
  884. port__free(port_state);
  885. err1:
  886. return NULL;
  887. }
  888. static int port__close_iocp(port_state_t* port_state) {
  889. HANDLE iocp = port_state->iocp;
  890. port_state->iocp = NULL;
  891. if (!CloseHandle(iocp))
  892. return_map_error(-1);
  893. return 0;
  894. }
  895. int port_close(port_state_t* port_state) {
  896. int result;
  897. EnterCriticalSection(&port_state->lock);
  898. result = port__close_iocp(port_state);
  899. LeaveCriticalSection(&port_state->lock);
  900. return result;
  901. }
  902. int port_delete(port_state_t* port_state) {
  903. tree_node_t* tree_node;
  904. queue_node_t* queue_node;
  905. /* At this point the IOCP port should have been closed. */
  906. assert(port_state->iocp == NULL);
  907. while ((tree_node = tree_root(&port_state->sock_tree)) != NULL) {
  908. sock_state_t* sock_state = sock_state_from_tree_node(tree_node);
  909. sock_force_delete(port_state, sock_state);
  910. }
  911. while ((queue_node = queue_first(&port_state->sock_deleted_queue)) != NULL) {
  912. sock_state_t* sock_state = sock_state_from_queue_node(queue_node);
  913. sock_force_delete(port_state, sock_state);
  914. }
  915. while ((queue_node = queue_first(&port_state->poll_group_queue)) != NULL) {
  916. poll_group_t* poll_group = poll_group_from_queue_node(queue_node);
  917. poll_group_delete(poll_group);
  918. }
  919. assert(queue_empty(&port_state->sock_update_queue));
  920. DeleteCriticalSection(&port_state->lock);
  921. port__free(port_state);
  922. return 0;
  923. }
  924. static int port__update_events(port_state_t* port_state) {
  925. queue_t* sock_update_queue = &port_state->sock_update_queue;
  926. /* Walk the queue, submitting new poll requests for every socket that needs
  927. * it. */
  928. while (!queue_empty(sock_update_queue)) {
  929. queue_node_t* queue_node = queue_first(sock_update_queue);
  930. sock_state_t* sock_state = sock_state_from_queue_node(queue_node);
  931. if (sock_update(port_state, sock_state) < 0)
  932. return -1;
  933. /* sock_update() removes the socket from the update queue. */
  934. }
  935. return 0;
  936. }
  937. static void port__update_events_if_polling(port_state_t* port_state) {
  938. if (port_state->active_poll_count > 0)
  939. port__update_events(port_state);
  940. }
  941. static int port__feed_events(port_state_t* port_state,
  942. struct epoll_event* epoll_events,
  943. OVERLAPPED_ENTRY* iocp_events,
  944. DWORD iocp_event_count) {
  945. int epoll_event_count = 0;
  946. DWORD i;
  947. for (i = 0; i < iocp_event_count; i++) {
  948. OVERLAPPED* overlapped = iocp_events[i].lpOverlapped;
  949. struct epoll_event* ev = &epoll_events[epoll_event_count];
  950. epoll_event_count += sock_feed_event(port_state, overlapped, ev);
  951. }
  952. return epoll_event_count;
  953. }
  954. static int port__poll(port_state_t* port_state,
  955. struct epoll_event* epoll_events,
  956. OVERLAPPED_ENTRY* iocp_events,
  957. DWORD maxevents,
  958. DWORD timeout) {
  959. DWORD completion_count;
  960. if (port__update_events(port_state) < 0)
  961. return -1;
  962. port_state->active_poll_count++;
  963. LeaveCriticalSection(&port_state->lock);
  964. BOOL r = GetQueuedCompletionStatusEx(port_state->iocp,
  965. iocp_events,
  966. maxevents,
  967. &completion_count,
  968. timeout,
  969. FALSE);
  970. EnterCriticalSection(&port_state->lock);
  971. port_state->active_poll_count--;
  972. if (!r)
  973. return_map_error(-1);
  974. return port__feed_events(
  975. port_state, epoll_events, iocp_events, completion_count);
  976. }
  977. int port_wait(port_state_t* port_state,
  978. struct epoll_event* events,
  979. int maxevents,
  980. int timeout) {
  981. OVERLAPPED_ENTRY stack_iocp_events[PORT__MAX_ON_STACK_COMPLETIONS];
  982. OVERLAPPED_ENTRY* iocp_events;
  983. uint64_t due = 0;
  984. DWORD gqcs_timeout;
  985. int result;
  986. /* Check whether `maxevents` is in range. */
  987. if (maxevents <= 0)
  988. return_set_error(-1, ERROR_INVALID_PARAMETER);
  989. /* Decide whether the IOCP completion list can live on the stack, or allocate
  990. * memory for it on the heap. */
  991. if ((size_t) maxevents <= array_count(stack_iocp_events)) {
  992. iocp_events = stack_iocp_events;
  993. } else if ((iocp_events =
  994. malloc((size_t) maxevents * sizeof *iocp_events)) == NULL) {
  995. iocp_events = stack_iocp_events;
  996. maxevents = array_count(stack_iocp_events);
  997. }
  998. /* Compute the timeout for GetQueuedCompletionStatus, and the wait end
  999. * time, if the user specified a timeout other than zero or infinite. */
  1000. if (timeout > 0) {
  1001. due = GetTickCount64() + (uint64_t) timeout;
  1002. gqcs_timeout = (DWORD) timeout;
  1003. } else if (timeout == 0) {
  1004. gqcs_timeout = 0;
  1005. } else {
  1006. gqcs_timeout = INFINITE;
  1007. }
  1008. EnterCriticalSection(&port_state->lock);
  1009. /* Dequeue completion packets until either at least one interesting event
  1010. * has been discovered, or the timeout is reached. */
  1011. for (;;) {
  1012. uint64_t now;
  1013. result = port__poll(
  1014. port_state, events, iocp_events, (DWORD) maxevents, gqcs_timeout);
  1015. if (result < 0 || result > 0)
  1016. break; /* Result, error, or time-out. */
  1017. if (timeout < 0)
  1018. continue; /* When timeout is negative, never time out. */
  1019. /* Update time. */
  1020. now = GetTickCount64();
  1021. /* Do not allow the due time to be in the past. */
  1022. if (now >= due) {
  1023. SetLastError(WAIT_TIMEOUT);
  1024. break;
  1025. }
  1026. /* Recompute time-out argument for GetQueuedCompletionStatus. */
  1027. gqcs_timeout = (DWORD)(due - now);
  1028. }
  1029. port__update_events_if_polling(port_state);
  1030. LeaveCriticalSection(&port_state->lock);
  1031. if (iocp_events != stack_iocp_events)
  1032. free(iocp_events);
  1033. if (result >= 0)
  1034. return result;
  1035. else if (GetLastError() == WAIT_TIMEOUT)
  1036. return 0;
  1037. else
  1038. return -1;
  1039. }
  1040. static int port__ctl_add(port_state_t* port_state,
  1041. SOCKET sock,
  1042. struct epoll_event* ev) {
  1043. sock_state_t* sock_state = sock_new(port_state, sock);
  1044. if (sock_state == NULL)
  1045. return -1;
  1046. if (sock_set_event(port_state, sock_state, ev) < 0) {
  1047. sock_delete(port_state, sock_state);
  1048. return -1;
  1049. }
  1050. port__update_events_if_polling(port_state);
  1051. return 0;
  1052. }
  1053. static int port__ctl_mod(port_state_t* port_state,
  1054. SOCKET sock,
  1055. struct epoll_event* ev) {
  1056. sock_state_t* sock_state = port_find_socket(port_state, sock);
  1057. if (sock_state == NULL)
  1058. return -1;
  1059. if (sock_set_event(port_state, sock_state, ev) < 0)
  1060. return -1;
  1061. port__update_events_if_polling(port_state);
  1062. return 0;
  1063. }
  1064. static int port__ctl_del(port_state_t* port_state, SOCKET sock) {
  1065. sock_state_t* sock_state = port_find_socket(port_state, sock);
  1066. if (sock_state == NULL)
  1067. return -1;
  1068. sock_delete(port_state, sock_state);
  1069. return 0;
  1070. }
  1071. static int port__ctl_op(port_state_t* port_state,
  1072. int op,
  1073. SOCKET sock,
  1074. struct epoll_event* ev) {
  1075. switch (op) {
  1076. case EPOLL_CTL_ADD:
  1077. return port__ctl_add(port_state, sock, ev);
  1078. case EPOLL_CTL_MOD:
  1079. return port__ctl_mod(port_state, sock, ev);
  1080. case EPOLL_CTL_DEL:
  1081. return port__ctl_del(port_state, sock);
  1082. default:
  1083. return_set_error(-1, ERROR_INVALID_PARAMETER);
  1084. }
  1085. }
  1086. int port_ctl(port_state_t* port_state,
  1087. int op,
  1088. SOCKET sock,
  1089. struct epoll_event* ev) {
  1090. int result;
  1091. EnterCriticalSection(&port_state->lock);
  1092. result = port__ctl_op(port_state, op, sock, ev);
  1093. LeaveCriticalSection(&port_state->lock);
  1094. return result;
  1095. }
  1096. int port_register_socket_handle(port_state_t* port_state,
  1097. sock_state_t* sock_state,
  1098. SOCKET socket) {
  1099. if (tree_add(&port_state->sock_tree,
  1100. sock_state_to_tree_node(sock_state),
  1101. socket) < 0)
  1102. return_set_error(-1, ERROR_ALREADY_EXISTS);
  1103. return 0;
  1104. }
  1105. void port_unregister_socket_handle(port_state_t* port_state,
  1106. sock_state_t* sock_state) {
  1107. tree_del(&port_state->sock_tree, sock_state_to_tree_node(sock_state));
  1108. }
  1109. sock_state_t* port_find_socket(port_state_t* port_state, SOCKET socket) {
  1110. tree_node_t* tree_node = tree_find(&port_state->sock_tree, socket);
  1111. if (tree_node == NULL)
  1112. return_set_error(NULL, ERROR_NOT_FOUND);
  1113. return sock_state_from_tree_node(tree_node);
  1114. }
  1115. void port_request_socket_update(port_state_t* port_state,
  1116. sock_state_t* sock_state) {
  1117. if (queue_enqueued(sock_state_to_queue_node(sock_state)))
  1118. return;
  1119. queue_append(&port_state->sock_update_queue,
  1120. sock_state_to_queue_node(sock_state));
  1121. }
  1122. void port_cancel_socket_update(port_state_t* port_state,
  1123. sock_state_t* sock_state) {
  1124. unused_var(port_state);
  1125. if (!queue_enqueued(sock_state_to_queue_node(sock_state)))
  1126. return;
  1127. queue_remove(sock_state_to_queue_node(sock_state));
  1128. }
  1129. void port_add_deleted_socket(port_state_t* port_state,
  1130. sock_state_t* sock_state) {
  1131. if (queue_enqueued(sock_state_to_queue_node(sock_state)))
  1132. return;
  1133. queue_append(&port_state->sock_deleted_queue,
  1134. sock_state_to_queue_node(sock_state));
  1135. }
  1136. void port_remove_deleted_socket(port_state_t* port_state,
  1137. sock_state_t* sock_state) {
  1138. unused_var(port_state);
  1139. if (!queue_enqueued(sock_state_to_queue_node(sock_state)))
  1140. return;
  1141. queue_remove(sock_state_to_queue_node(sock_state));
  1142. }
  1143. void queue_init(queue_t* queue) {
  1144. queue_node_init(&queue->head);
  1145. }
  1146. void queue_node_init(queue_node_t* node) {
  1147. node->prev = node;
  1148. node->next = node;
  1149. }
  1150. static inline void queue__detach_node(queue_node_t* node) {
  1151. node->prev->next = node->next;
  1152. node->next->prev = node->prev;
  1153. }
  1154. queue_node_t* queue_first(const queue_t* queue) {
  1155. return !queue_empty(queue) ? queue->head.next : NULL;
  1156. }
  1157. queue_node_t* queue_last(const queue_t* queue) {
  1158. return !queue_empty(queue) ? queue->head.prev : NULL;
  1159. }
  1160. void queue_prepend(queue_t* queue, queue_node_t* node) {
  1161. node->next = queue->head.next;
  1162. node->prev = &queue->head;
  1163. node->next->prev = node;
  1164. queue->head.next = node;
  1165. }
  1166. void queue_append(queue_t* queue, queue_node_t* node) {
  1167. node->next = &queue->head;
  1168. node->prev = queue->head.prev;
  1169. node->prev->next = node;
  1170. queue->head.prev = node;
  1171. }
  1172. void queue_move_first(queue_t* queue, queue_node_t* node) {
  1173. queue__detach_node(node);
  1174. queue_prepend(queue, node);
  1175. }
  1176. void queue_move_last(queue_t* queue, queue_node_t* node) {
  1177. queue__detach_node(node);
  1178. queue_append(queue, node);
  1179. }
  1180. void queue_remove(queue_node_t* node) {
  1181. queue__detach_node(node);
  1182. queue_node_init(node);
  1183. }
  1184. bool queue_empty(const queue_t* queue) {
  1185. return !queue_enqueued(&queue->head);
  1186. }
  1187. bool queue_enqueued(const queue_node_t* node) {
  1188. return node->prev != node;
  1189. }
  1190. /* clang-format off */
  1191. static const long REFLOCK__REF = (long) 0x00000001;
  1192. static const long REFLOCK__REF_MASK = (long) 0x0fffffff;
  1193. static const long REFLOCK__DESTROY = (long) 0x10000000;
  1194. static const long REFLOCK__DESTROY_MASK = (long) 0xf0000000;
  1195. static const long REFLOCK__POISON = (long) 0x300DEAD0;
  1196. /* clang-format on */
  1197. static HANDLE reflock__keyed_event = NULL;
  1198. int reflock_global_init(void) {
  1199. NTSTATUS status =
  1200. NtCreateKeyedEvent(&reflock__keyed_event, ~(ACCESS_MASK) 0, NULL, 0);
  1201. if (status != STATUS_SUCCESS)
  1202. return_set_error(-1, RtlNtStatusToDosError(status));
  1203. return 0;
  1204. }
  1205. void reflock_init(reflock_t* reflock) {
  1206. reflock->state = 0;
  1207. }
  1208. static void reflock__signal_event(void* address) {
  1209. NTSTATUS status =
  1210. NtReleaseKeyedEvent(reflock__keyed_event, address, FALSE, NULL);
  1211. if (status != STATUS_SUCCESS)
  1212. abort();
  1213. }
  1214. static void reflock__await_event(void* address) {
  1215. NTSTATUS status =
  1216. NtWaitForKeyedEvent(reflock__keyed_event, address, FALSE, NULL);
  1217. if (status != STATUS_SUCCESS)
  1218. abort();
  1219. }
  1220. void reflock_ref(reflock_t* reflock) {
  1221. long state = InterlockedAdd(&reflock->state, REFLOCK__REF);
  1222. unused_var(state);
  1223. assert((state & REFLOCK__DESTROY_MASK) == 0); /* Overflow or destroyed. */
  1224. }
  1225. void reflock_unref(reflock_t* reflock) {
  1226. long state = InterlockedAdd(&reflock->state, -REFLOCK__REF);
  1227. long ref_count = state & REFLOCK__REF_MASK;
  1228. long destroy = state & REFLOCK__DESTROY_MASK;
  1229. unused_var(ref_count);
  1230. unused_var(destroy);
  1231. if (state == REFLOCK__DESTROY)
  1232. reflock__signal_event(reflock);
  1233. else
  1234. assert(destroy == 0 || ref_count > 0);
  1235. }
  1236. void reflock_unref_and_destroy(reflock_t* reflock) {
  1237. long state =
  1238. InterlockedAdd(&reflock->state, REFLOCK__DESTROY - REFLOCK__REF);
  1239. long ref_count = state & REFLOCK__REF_MASK;
  1240. assert((state & REFLOCK__DESTROY_MASK) ==
  1241. REFLOCK__DESTROY); /* Underflow or already destroyed. */
  1242. if (ref_count != 0)
  1243. reflock__await_event(reflock);
  1244. state = InterlockedExchange(&reflock->state, REFLOCK__POISON);
  1245. assert(state == REFLOCK__DESTROY);
  1246. }
  1247. static const uint32_t SOCK__KNOWN_EPOLL_EVENTS =
  1248. EPOLLIN | EPOLLPRI | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLRDNORM |
  1249. EPOLLRDBAND | EPOLLWRNORM | EPOLLWRBAND | EPOLLMSG | EPOLLRDHUP;
  1250. typedef enum sock__poll_status {
  1251. SOCK__POLL_IDLE = 0,
  1252. SOCK__POLL_PENDING,
  1253. SOCK__POLL_CANCELLED
  1254. } sock__poll_status_t;
  1255. typedef struct sock_state {
  1256. OVERLAPPED overlapped;
  1257. AFD_POLL_INFO poll_info;
  1258. queue_node_t queue_node;
  1259. tree_node_t tree_node;
  1260. poll_group_t* poll_group;
  1261. SOCKET base_socket;
  1262. epoll_data_t user_data;
  1263. uint32_t user_events;
  1264. uint32_t pending_events;
  1265. sock__poll_status_t poll_status;
  1266. bool delete_pending;
  1267. } sock_state_t;
  1268. static inline sock_state_t* sock__alloc(void) {
  1269. sock_state_t* sock_state = malloc(sizeof *sock_state);
  1270. if (sock_state == NULL)
  1271. return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY);
  1272. return sock_state;
  1273. }
  1274. static inline void sock__free(sock_state_t* sock_state) {
  1275. free(sock_state);
  1276. }
  1277. static int sock__cancel_poll(sock_state_t* sock_state) {
  1278. HANDLE afd_helper_handle =
  1279. poll_group_get_afd_helper_handle(sock_state->poll_group);
  1280. assert(sock_state->poll_status == SOCK__POLL_PENDING);
  1281. /* CancelIoEx() may fail with ERROR_NOT_FOUND if the overlapped operation has
  1282. * already completed. This is not a problem and we proceed normally. */
  1283. if (!HasOverlappedIoCompleted(&sock_state->overlapped) &&
  1284. !CancelIoEx(afd_helper_handle, &sock_state->overlapped) &&
  1285. GetLastError() != ERROR_NOT_FOUND)
  1286. return_map_error(-1);
  1287. sock_state->poll_status = SOCK__POLL_CANCELLED;
  1288. sock_state->pending_events = 0;
  1289. return 0;
  1290. }
  1291. sock_state_t* sock_new(port_state_t* port_state, SOCKET socket) {
  1292. SOCKET base_socket;
  1293. poll_group_t* poll_group;
  1294. sock_state_t* sock_state;
  1295. if (socket == 0 || socket == INVALID_SOCKET)
  1296. return_set_error(NULL, ERROR_INVALID_HANDLE);
  1297. base_socket = ws_get_base_socket(socket);
  1298. if (base_socket == INVALID_SOCKET)
  1299. return NULL;
  1300. poll_group = poll_group_acquire(port_state);
  1301. if (poll_group == NULL)
  1302. return NULL;
  1303. sock_state = sock__alloc();
  1304. if (sock_state == NULL)
  1305. goto err1;
  1306. memset(sock_state, 0, sizeof *sock_state);
  1307. sock_state->base_socket = base_socket;
  1308. sock_state->poll_group = poll_group;
  1309. tree_node_init(&sock_state->tree_node);
  1310. queue_node_init(&sock_state->queue_node);
  1311. if (port_register_socket_handle(port_state, sock_state, socket) < 0)
  1312. goto err2;
  1313. return sock_state;
  1314. err2:
  1315. sock__free(sock_state);
  1316. err1:
  1317. poll_group_release(poll_group);
  1318. return NULL;
  1319. }
  1320. static int sock__delete(port_state_t* port_state,
  1321. sock_state_t* sock_state,
  1322. bool force) {
  1323. if (!sock_state->delete_pending) {
  1324. if (sock_state->poll_status == SOCK__POLL_PENDING)
  1325. sock__cancel_poll(sock_state);
  1326. port_cancel_socket_update(port_state, sock_state);
  1327. port_unregister_socket_handle(port_state, sock_state);
  1328. sock_state->delete_pending = true;
  1329. }
  1330. /* If the poll request still needs to complete, the sock_state object can't
  1331. * be free()d yet. `sock_feed_event()` or `port_close()` will take care
  1332. * of this later. */
  1333. if (force || sock_state->poll_status == SOCK__POLL_IDLE) {
  1334. /* Free the sock_state now. */
  1335. port_remove_deleted_socket(port_state, sock_state);
  1336. poll_group_release(sock_state->poll_group);
  1337. sock__free(sock_state);
  1338. } else {
  1339. /* Free the socket later. */
  1340. port_add_deleted_socket(port_state, sock_state);
  1341. }
  1342. return 0;
  1343. }
  1344. void sock_delete(port_state_t* port_state, sock_state_t* sock_state) {
  1345. sock__delete(port_state, sock_state, false);
  1346. }
  1347. void sock_force_delete(port_state_t* port_state, sock_state_t* sock_state) {
  1348. sock__delete(port_state, sock_state, true);
  1349. }
  1350. int sock_set_event(port_state_t* port_state,
  1351. sock_state_t* sock_state,
  1352. const struct epoll_event* ev) {
  1353. /* EPOLLERR and EPOLLHUP are always reported, even when not requested by the
  1354. * caller. However they are disabled after a event has been reported for a
  1355. * socket for which the EPOLLONESHOT flag as set. */
  1356. uint32_t events = ev->events | EPOLLERR | EPOLLHUP;
  1357. sock_state->user_events = events;
  1358. sock_state->user_data = ev->data;
  1359. if ((events & SOCK__KNOWN_EPOLL_EVENTS & ~sock_state->pending_events) != 0)
  1360. port_request_socket_update(port_state, sock_state);
  1361. return 0;
  1362. }
  1363. static inline DWORD sock__epoll_events_to_afd_events(uint32_t epoll_events) {
  1364. /* Always monitor for AFD_POLL_LOCAL_CLOSE, which is triggered when the
  1365. * socket is closed with closesocket() or CloseHandle(). */
  1366. DWORD afd_events = AFD_POLL_LOCAL_CLOSE;
  1367. if (epoll_events & (EPOLLIN | EPOLLRDNORM))
  1368. afd_events |= AFD_POLL_RECEIVE | AFD_POLL_ACCEPT;
  1369. if (epoll_events & (EPOLLPRI | EPOLLRDBAND))
  1370. afd_events |= AFD_POLL_RECEIVE_EXPEDITED;
  1371. if (epoll_events & (EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND))
  1372. afd_events |= AFD_POLL_SEND;
  1373. if (epoll_events & (EPOLLIN | EPOLLRDNORM | EPOLLRDHUP))
  1374. afd_events |= AFD_POLL_DISCONNECT;
  1375. if (epoll_events & EPOLLHUP)
  1376. afd_events |= AFD_POLL_ABORT;
  1377. if (epoll_events & EPOLLERR)
  1378. afd_events |= AFD_POLL_CONNECT_FAIL;
  1379. return afd_events;
  1380. }
  1381. static inline uint32_t sock__afd_events_to_epoll_events(DWORD afd_events) {
  1382. uint32_t epoll_events = 0;
  1383. if (afd_events & (AFD_POLL_RECEIVE | AFD_POLL_ACCEPT))
  1384. epoll_events |= EPOLLIN | EPOLLRDNORM;
  1385. if (afd_events & AFD_POLL_RECEIVE_EXPEDITED)
  1386. epoll_events |= EPOLLPRI | EPOLLRDBAND;
  1387. if (afd_events & AFD_POLL_SEND)
  1388. epoll_events |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
  1389. if (afd_events & AFD_POLL_DISCONNECT)
  1390. epoll_events |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
  1391. if (afd_events & AFD_POLL_ABORT)
  1392. epoll_events |= EPOLLHUP;
  1393. if (afd_events & AFD_POLL_CONNECT_FAIL)
  1394. /* Linux reports all these events after connect() has failed. */
  1395. epoll_events |=
  1396. EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLRDNORM | EPOLLWRNORM | EPOLLRDHUP;
  1397. return epoll_events;
  1398. }
  1399. int sock_update(port_state_t* port_state, sock_state_t* sock_state) {
  1400. assert(!sock_state->delete_pending);
  1401. if ((sock_state->poll_status == SOCK__POLL_PENDING) &&
  1402. (sock_state->user_events & SOCK__KNOWN_EPOLL_EVENTS &
  1403. ~sock_state->pending_events) == 0) {
  1404. /* All the events the user is interested in are already being monitored by
  1405. * the pending poll operation. It might spuriously complete because of an
  1406. * event that we're no longer interested in; when that happens we'll submit
  1407. * a new poll operation with the updated event mask. */
  1408. } else if (sock_state->poll_status == SOCK__POLL_PENDING) {
  1409. /* A poll operation is already pending, but it's not monitoring for all the
  1410. * events that the user is interested in. Therefore, cancel the pending
  1411. * poll operation; when we receive it's completion package, a new poll
  1412. * operation will be submitted with the correct event mask. */
  1413. if (sock__cancel_poll(sock_state) < 0)
  1414. return -1;
  1415. } else if (sock_state->poll_status == SOCK__POLL_CANCELLED) {
  1416. /* The poll operation has already been cancelled, we're still waiting for
  1417. * it to return. For now, there's nothing that needs to be done. */
  1418. } else if (sock_state->poll_status == SOCK__POLL_IDLE) {
  1419. /* No poll operation is pending; start one. */
  1420. sock_state->poll_info.Exclusive = FALSE;
  1421. sock_state->poll_info.NumberOfHandles = 1;
  1422. sock_state->poll_info.Timeout.QuadPart = INT64_MAX;
  1423. sock_state->poll_info.Handles[0].Handle = (HANDLE) sock_state->base_socket;
  1424. sock_state->poll_info.Handles[0].Status = 0;
  1425. sock_state->poll_info.Handles[0].Events =
  1426. sock__epoll_events_to_afd_events(sock_state->user_events);
  1427. memset(&sock_state->overlapped, 0, sizeof sock_state->overlapped);
  1428. if (afd_poll(poll_group_get_afd_helper_handle(sock_state->poll_group),
  1429. &sock_state->poll_info,
  1430. &sock_state->overlapped) < 0) {
  1431. switch (GetLastError()) {
  1432. case ERROR_IO_PENDING:
  1433. /* Overlapped poll operation in progress; this is expected. */
  1434. break;
  1435. case ERROR_INVALID_HANDLE:
  1436. /* Socket closed; it'll be dropped from the epoll set. */
  1437. return sock__delete(port_state, sock_state, false);
  1438. default:
  1439. /* Other errors are propagated to the caller. */
  1440. return_map_error(-1);
  1441. }
  1442. }
  1443. /* The poll request was successfully submitted. */
  1444. sock_state->poll_status = SOCK__POLL_PENDING;
  1445. sock_state->pending_events = sock_state->user_events;
  1446. } else {
  1447. /* Unreachable. */
  1448. assert(false);
  1449. }
  1450. port_cancel_socket_update(port_state, sock_state);
  1451. return 0;
  1452. }
  1453. int sock_feed_event(port_state_t* port_state,
  1454. OVERLAPPED* overlapped,
  1455. struct epoll_event* ev) {
  1456. sock_state_t* sock_state =
  1457. container_of(overlapped, sock_state_t, overlapped);
  1458. AFD_POLL_INFO* poll_info = &sock_state->poll_info;
  1459. uint32_t epoll_events = 0;
  1460. sock_state->poll_status = SOCK__POLL_IDLE;
  1461. sock_state->pending_events = 0;
  1462. if (sock_state->delete_pending) {
  1463. /* Socket has been deleted earlier and can now be freed. */
  1464. return sock__delete(port_state, sock_state, false);
  1465. } else if ((NTSTATUS) overlapped->Internal == STATUS_CANCELLED) {
  1466. /* The poll request was cancelled by CancelIoEx. */
  1467. } else if (!NT_SUCCESS(overlapped->Internal)) {
  1468. /* The overlapped request itself failed in an unexpected way. */
  1469. epoll_events = EPOLLERR;
  1470. } else if (poll_info->NumberOfHandles < 1) {
  1471. /* This poll operation succeeded but didn't report any socket events. */
  1472. } else if (poll_info->Handles[0].Events & AFD_POLL_LOCAL_CLOSE) {
  1473. /* The poll operation reported that the socket was closed. */
  1474. return sock__delete(port_state, sock_state, false);
  1475. } else {
  1476. /* Events related to our socket were reported. */
  1477. epoll_events =
  1478. sock__afd_events_to_epoll_events(poll_info->Handles[0].Events);
  1479. }
  1480. /* Requeue the socket so a new poll request will be submitted. */
  1481. port_request_socket_update(port_state, sock_state);
  1482. /* Filter out events that the user didn't ask for. */
  1483. epoll_events &= sock_state->user_events;
  1484. /* Return if there are no epoll events to report. */
  1485. if (epoll_events == 0)
  1486. return 0;
  1487. /* If the the socket has the EPOLLONESHOT flag set, unmonitor all events,
  1488. * even EPOLLERR and EPOLLHUP. But always keep looking for closed sockets. */
  1489. if (sock_state->user_events & EPOLLONESHOT)
  1490. sock_state->user_events = 0;
  1491. ev->data = sock_state->user_data;
  1492. ev->events = epoll_events;
  1493. return 1;
  1494. }
  1495. queue_node_t* sock_state_to_queue_node(sock_state_t* sock_state) {
  1496. return &sock_state->queue_node;
  1497. }
  1498. sock_state_t* sock_state_from_tree_node(tree_node_t* tree_node) {
  1499. return container_of(tree_node, sock_state_t, tree_node);
  1500. }
  1501. tree_node_t* sock_state_to_tree_node(sock_state_t* sock_state) {
  1502. return &sock_state->tree_node;
  1503. }
  1504. sock_state_t* sock_state_from_queue_node(queue_node_t* queue_node) {
  1505. return container_of(queue_node, sock_state_t, queue_node);
  1506. }
  1507. void ts_tree_init(ts_tree_t* ts_tree) {
  1508. tree_init(&ts_tree->tree);
  1509. InitializeSRWLock(&ts_tree->lock);
  1510. }
  1511. void ts_tree_node_init(ts_tree_node_t* node) {
  1512. tree_node_init(&node->tree_node);
  1513. reflock_init(&node->reflock);
  1514. }
  1515. int ts_tree_add(ts_tree_t* ts_tree, ts_tree_node_t* node, uintptr_t key) {
  1516. int r;
  1517. AcquireSRWLockExclusive(&ts_tree->lock);
  1518. r = tree_add(&ts_tree->tree, &node->tree_node, key);
  1519. ReleaseSRWLockExclusive(&ts_tree->lock);
  1520. return r;
  1521. }
  1522. static inline ts_tree_node_t* ts_tree__find_node(ts_tree_t* ts_tree,
  1523. uintptr_t key) {
  1524. tree_node_t* tree_node = tree_find(&ts_tree->tree, key);
  1525. if (tree_node == NULL)
  1526. return NULL;
  1527. return container_of(tree_node, ts_tree_node_t, tree_node);
  1528. }
  1529. ts_tree_node_t* ts_tree_del_and_ref(ts_tree_t* ts_tree, uintptr_t key) {
  1530. ts_tree_node_t* ts_tree_node;
  1531. AcquireSRWLockExclusive(&ts_tree->lock);
  1532. ts_tree_node = ts_tree__find_node(ts_tree, key);
  1533. if (ts_tree_node != NULL) {
  1534. tree_del(&ts_tree->tree, &ts_tree_node->tree_node);
  1535. reflock_ref(&ts_tree_node->reflock);
  1536. }
  1537. ReleaseSRWLockExclusive(&ts_tree->lock);
  1538. return ts_tree_node;
  1539. }
  1540. ts_tree_node_t* ts_tree_find_and_ref(ts_tree_t* ts_tree, uintptr_t key) {
  1541. ts_tree_node_t* ts_tree_node;
  1542. AcquireSRWLockShared(&ts_tree->lock);
  1543. ts_tree_node = ts_tree__find_node(ts_tree, key);
  1544. if (ts_tree_node != NULL)
  1545. reflock_ref(&ts_tree_node->reflock);
  1546. ReleaseSRWLockShared(&ts_tree->lock);
  1547. return ts_tree_node;
  1548. }
  1549. void ts_tree_node_unref(ts_tree_node_t* node) {
  1550. reflock_unref(&node->reflock);
  1551. }
  1552. void ts_tree_node_unref_and_destroy(ts_tree_node_t* node) {
  1553. reflock_unref_and_destroy(&node->reflock);
  1554. }
  1555. void tree_init(tree_t* tree) {
  1556. memset(tree, 0, sizeof *tree);
  1557. }
  1558. void tree_node_init(tree_node_t* node) {
  1559. memset(node, 0, sizeof *node);
  1560. }
  1561. #define TREE__ROTATE(cis, trans) \
  1562. tree_node_t* p = node; \
  1563. tree_node_t* q = node->trans; \
  1564. tree_node_t* parent = p->parent; \
  1565. \
  1566. if (parent) { \
  1567. if (parent->left == p) \
  1568. parent->left = q; \
  1569. else \
  1570. parent->right = q; \
  1571. } else { \
  1572. tree->root = q; \
  1573. } \
  1574. \
  1575. q->parent = parent; \
  1576. p->parent = q; \
  1577. p->trans = q->cis; \
  1578. if (p->trans) \
  1579. p->trans->parent = p; \
  1580. q->cis = p;
  1581. static inline void tree__rotate_left(tree_t* tree, tree_node_t* node) {
  1582. TREE__ROTATE(left, right)
  1583. }
  1584. static inline void tree__rotate_right(tree_t* tree, tree_node_t* node) {
  1585. TREE__ROTATE(right, left)
  1586. }
  1587. #define TREE__INSERT_OR_DESCEND(side) \
  1588. if (parent->side) { \
  1589. parent = parent->side; \
  1590. } else { \
  1591. parent->side = node; \
  1592. break; \
  1593. }
  1594. #define TREE__FIXUP_AFTER_INSERT(cis, trans) \
  1595. tree_node_t* grandparent = parent->parent; \
  1596. tree_node_t* uncle = grandparent->trans; \
  1597. \
  1598. if (uncle && uncle->red) { \
  1599. parent->red = uncle->red = false; \
  1600. grandparent->red = true; \
  1601. node = grandparent; \
  1602. } else { \
  1603. if (node == parent->trans) { \
  1604. tree__rotate_##cis(tree, parent); \
  1605. node = parent; \
  1606. parent = node->parent; \
  1607. } \
  1608. parent->red = false; \
  1609. grandparent->red = true; \
  1610. tree__rotate_##trans(tree, grandparent); \
  1611. }
  1612. int tree_add(tree_t* tree, tree_node_t* node, uintptr_t key) {
  1613. tree_node_t* parent;
  1614. parent = tree->root;
  1615. if (parent) {
  1616. for (;;) {
  1617. if (key < parent->key) {
  1618. TREE__INSERT_OR_DESCEND(left)
  1619. } else if (key > parent->key) {
  1620. TREE__INSERT_OR_DESCEND(right)
  1621. } else {
  1622. return -1;
  1623. }
  1624. }
  1625. } else {
  1626. tree->root = node;
  1627. }
  1628. node->key = key;
  1629. node->left = node->right = NULL;
  1630. node->parent = parent;
  1631. node->red = true;
  1632. for (; parent && parent->red; parent = node->parent) {
  1633. if (parent == parent->parent->left) {
  1634. TREE__FIXUP_AFTER_INSERT(left, right)
  1635. } else {
  1636. TREE__FIXUP_AFTER_INSERT(right, left)
  1637. }
  1638. }
  1639. tree->root->red = false;
  1640. return 0;
  1641. }
  1642. #define TREE__FIXUP_AFTER_REMOVE(cis, trans) \
  1643. tree_node_t* sibling = parent->trans; \
  1644. \
  1645. if (sibling->red) { \
  1646. sibling->red = false; \
  1647. parent->red = true; \
  1648. tree__rotate_##cis(tree, parent); \
  1649. sibling = parent->trans; \
  1650. } \
  1651. if ((sibling->left && sibling->left->red) || \
  1652. (sibling->right && sibling->right->red)) { \
  1653. if (!sibling->trans || !sibling->trans->red) { \
  1654. sibling->cis->red = false; \
  1655. sibling->red = true; \
  1656. tree__rotate_##trans(tree, sibling); \
  1657. sibling = parent->trans; \
  1658. } \
  1659. sibling->red = parent->red; \
  1660. parent->red = sibling->trans->red = false; \
  1661. tree__rotate_##cis(tree, parent); \
  1662. node = tree->root; \
  1663. break; \
  1664. } \
  1665. sibling->red = true;
  1666. void tree_del(tree_t* tree, tree_node_t* node) {
  1667. tree_node_t* parent = node->parent;
  1668. tree_node_t* left = node->left;
  1669. tree_node_t* right = node->right;
  1670. tree_node_t* next;
  1671. bool red;
  1672. if (!left) {
  1673. next = right;
  1674. } else if (!right) {
  1675. next = left;
  1676. } else {
  1677. next = right;
  1678. while (next->left)
  1679. next = next->left;
  1680. }
  1681. if (parent) {
  1682. if (parent->left == node)
  1683. parent->left = next;
  1684. else
  1685. parent->right = next;
  1686. } else {
  1687. tree->root = next;
  1688. }
  1689. if (left && right) {
  1690. red = next->red;
  1691. next->red = node->red;
  1692. next->left = left;
  1693. left->parent = next;
  1694. if (next != right) {
  1695. parent = next->parent;
  1696. next->parent = node->parent;
  1697. node = next->right;
  1698. parent->left = node;
  1699. next->right = right;
  1700. right->parent = next;
  1701. } else {
  1702. next->parent = parent;
  1703. parent = next;
  1704. node = next->right;
  1705. }
  1706. } else {
  1707. red = node->red;
  1708. node = next;
  1709. }
  1710. if (node)
  1711. node->parent = parent;
  1712. if (red)
  1713. return;
  1714. if (node && node->red) {
  1715. node->red = false;
  1716. return;
  1717. }
  1718. do {
  1719. if (node == tree->root)
  1720. break;
  1721. if (node == parent->left) {
  1722. TREE__FIXUP_AFTER_REMOVE(left, right)
  1723. } else {
  1724. TREE__FIXUP_AFTER_REMOVE(right, left)
  1725. }
  1726. node = parent;
  1727. parent = parent->parent;
  1728. } while (!node->red);
  1729. if (node)
  1730. node->red = false;
  1731. }
  1732. tree_node_t* tree_find(const tree_t* tree, uintptr_t key) {
  1733. tree_node_t* node = tree->root;
  1734. while (node) {
  1735. if (key < node->key)
  1736. node = node->left;
  1737. else if (key > node->key)
  1738. node = node->right;
  1739. else
  1740. return node;
  1741. }
  1742. return NULL;
  1743. }
  1744. tree_node_t* tree_root(const tree_t* tree) {
  1745. return tree->root;
  1746. }
  1747. #ifndef SIO_BASE_HANDLE
  1748. #define SIO_BASE_HANDLE 0x48000022
  1749. #endif
  1750. int ws_global_init(void) {
  1751. int r;
  1752. WSADATA wsa_data;
  1753. r = WSAStartup(MAKEWORD(2, 2), &wsa_data);
  1754. if (r != 0)
  1755. return_set_error(-1, (DWORD) r);
  1756. return 0;
  1757. }
  1758. SOCKET ws_get_base_socket(SOCKET socket) {
  1759. SOCKET base_socket;
  1760. DWORD bytes;
  1761. if (WSAIoctl(socket,
  1762. SIO_BASE_HANDLE,
  1763. NULL,
  1764. 0,
  1765. &base_socket,
  1766. sizeof base_socket,
  1767. &bytes,
  1768. NULL,
  1769. NULL) == SOCKET_ERROR)
  1770. return_map_error(INVALID_SOCKET);
  1771. return base_socket;
  1772. }