From 12f436718963f8343e38ad6d0e8f7251c95984cd Mon Sep 17 00:00:00 2001 From: Valentin Bartenev Date: Fri, 13 May 2016 17:19:23 +0300 Subject: [PATCH] Improved EPOLLRDHUP handling. When it's known that the kernel supports EPOLLRDHUP, there is no need in additional recv() call to get EOF or error when the flag is absent in the event generated by the kernel. A special runtime test is done at startup to detect if EPOLLRDHUP is actually supported by the kernel because epoll_ctl() silently ignores unknown flags. With this knowledge it's now possible to drop the "ready" flag for partial read. Previously, the "ready" flag was kept until the recv() returned EOF or error. In particular, this change allows the lingering close heuristics (which relies on the "ready" flag state) to actually work on Linux, and not wait for more data in most cases. The "available" flag is now used in the read event with the semantics similar to the corresponding counter in kqueue. --- src/event/modules/ngx_epoll_module.c | 72 +++++++++++++++++++++++++++- src/event/ngx_event.h | 7 +++ src/http/ngx_http_request.c | 6 ++- src/http/ngx_http_upstream.c | 6 ++- src/os/unix/ngx_readv_chain.c | 32 +++++++++++++ src/os/unix/ngx_recv.c | 33 +++++++++++++ 6 files changed, 153 insertions(+), 3 deletions(-) diff --git a/src/event/modules/ngx_epoll_module.c b/src/event/modules/ngx_epoll_module.c index 166c46166..0b23a952f 100644 --- a/src/event/modules/ngx_epoll_module.c +++ b/src/event/modules/ngx_epoll_module.c @@ -123,6 +123,7 @@ static ngx_int_t ngx_epoll_process_events(ngx_cycle_t *cycle, ngx_msec_t timer, static void ngx_epoll_eventfd_handler(ngx_event_t *ev); #endif +static ngx_int_t ngx_epoll_module_init(ngx_cycle_t *cycle); static void *ngx_epoll_create_conf(ngx_cycle_t *cycle); static char *ngx_epoll_init_conf(ngx_cycle_t *cycle, void *conf); @@ -146,6 +147,10 @@ static ngx_connection_t ngx_eventfd_conn; #endif +#if (NGX_HAVE_EPOLLRDHUP) +ngx_uint_t ngx_use_epoll_rdhup; +#endif + static ngx_str_t epoll_name = ngx_string("epoll"); static ngx_command_t ngx_epoll_commands[] = { @@ -197,7 +202,7 @@ ngx_module_t ngx_epoll_module = { ngx_epoll_commands, /* module directives */ NGX_EVENT_MODULE, /* module type */ NULL, /* init master */ - NULL, /* init module */ + ngx_epoll_module_init, /* init module */ NULL, /* init process */ NULL, /* init thread */ NULL, /* exit thread */ @@ -808,6 +813,8 @@ ngx_epoll_process_events(ngx_cycle_t *cycle, ngx_msec_t timer, ngx_uint_t flags) if (revents & EPOLLRDHUP) { rev->pending_eof = 1; } + + rev->available = 1; #endif rev->ready = 1; @@ -943,6 +950,69 @@ ngx_epoll_eventfd_handler(ngx_event_t *ev) #endif +static ngx_int_t +ngx_epoll_module_init(ngx_cycle_t *cycle) +{ +#if (NGX_HAVE_EPOLLRDHUP) + int epfd, s[2], events; + struct epoll_event ee; + + epfd = epoll_create(1); + + if (epfd == -1) { + ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno, + "epoll_create() failed"); + return NGX_ERROR; + } + + if (socketpair(AF_UNIX, SOCK_STREAM, 0, s) == -1) { + ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno, + "socketpair() failed"); + return NGX_ERROR; + } + + ee.events = EPOLLET|EPOLLIN|EPOLLRDHUP; + + if (epoll_ctl(epfd, EPOLL_CTL_ADD, s[0], &ee) == -1) { + ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno, + "epoll_ctl() failed"); + return NGX_ERROR; + } + + if (close(s[1]) == -1) { + ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno, + "close() failed"); + return NGX_ERROR; + } + + events = epoll_wait(epfd, &ee, 1, 5000); + + if (events == -1) { + ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno, + "epoll_wait() failed"); + return NGX_ERROR; + } + + (void) close(s[0]); + (void) close(epfd); + + if (events) { + ngx_use_epoll_rdhup = ee.events & EPOLLRDHUP; + + } else { + ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno, + "epoll_wait() timedout"); + } + + ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, + "testing the EPOLLRDHUP flag: %s", + ngx_use_epoll_rdhup ? "success" : "fail"); +#endif + + return NGX_OK; +} + + static void * ngx_epoll_create_conf(ngx_cycle_t *cycle) { diff --git a/src/event/ngx_event.h b/src/event/ngx_event.h index ed0682cf1..1c7c14873 100644 --- a/src/event/ngx_event.h +++ b/src/event/ngx_event.h @@ -96,6 +96,10 @@ struct ngx_event_s { * write: available space in buffer when event is ready * or lowat when event is set with NGX_LOWAT_EVENT flag * + * epoll with EPOLLRDHUP: + * accept: 1 if accept many, 0 otherwise + * read: 1 if there can be data to read, 0 otherwise + * * iocp: TODO * * otherwise: @@ -196,6 +200,9 @@ typedef struct { extern ngx_event_actions_t ngx_event_actions; +#if (NGX_HAVE_EPOLLRDHUP) +extern ngx_uint_t ngx_use_epoll_rdhup; +#endif /* diff --git a/src/http/ngx_http_request.c b/src/http/ngx_http_request.c index 7d6cada4d..aa3c398a3 100644 --- a/src/http/ngx_http_request.c +++ b/src/http/ngx_http_request.c @@ -2752,9 +2752,13 @@ ngx_http_test_reading(ngx_http_request_t *r) #if (NGX_HAVE_EPOLLRDHUP) - if ((ngx_event_flags & NGX_USE_EPOLL_EVENT) && rev->pending_eof) { + if ((ngx_event_flags & NGX_USE_EPOLL_EVENT) && ngx_use_epoll_rdhup) { socklen_t len; + if (!rev->pending_eof) { + return; + } + rev->eof = 1; c->error = 1; diff --git a/src/http/ngx_http_upstream.c b/src/http/ngx_http_upstream.c index 5f08e3909..89b216061 100644 --- a/src/http/ngx_http_upstream.c +++ b/src/http/ngx_http_upstream.c @@ -1222,9 +1222,13 @@ ngx_http_upstream_check_broken_connection(ngx_http_request_t *r, #if (NGX_HAVE_EPOLLRDHUP) - if ((ngx_event_flags & NGX_USE_EPOLL_EVENT) && ev->pending_eof) { + if ((ngx_event_flags & NGX_USE_EPOLL_EVENT) && ngx_use_epoll_rdhup) { socklen_t len; + if (!ev->pending_eof) { + return; + } + ev->eof = 1; c->error = 1; diff --git a/src/os/unix/ngx_readv_chain.c b/src/os/unix/ngx_readv_chain.c index d23508ea7..454cfdcad 100644 --- a/src/os/unix/ngx_readv_chain.c +++ b/src/os/unix/ngx_readv_chain.c @@ -51,6 +51,20 @@ ngx_readv_chain(ngx_connection_t *c, ngx_chain_t *chain, off_t limit) } } +#endif + +#if (NGX_HAVE_EPOLLRDHUP) + + if (ngx_event_flags & NGX_USE_EPOLL_EVENT) { + ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0, + "readv: eof:%d, avail:%d", + rev->pending_eof, rev->available); + + if (!rev->available && !rev->pending_eof) { + return NGX_AGAIN; + } + } + #endif prev = NULL; @@ -149,6 +163,24 @@ ngx_readv_chain(ngx_connection_t *c, ngx_chain_t *chain, off_t limit) return n; } +#endif + +#if (NGX_HAVE_EPOLLRDHUP) + + if ((ngx_event_flags & NGX_USE_EPOLL_EVENT) + && ngx_use_epoll_rdhup) + { + if (n < size) { + if (!rev->pending_eof) { + rev->ready = 0; + } + + rev->available = 0; + } + + return n; + } + #endif if (n < size && !(ngx_event_flags & NGX_USE_GREEDY_EVENT)) { diff --git a/src/os/unix/ngx_recv.c b/src/os/unix/ngx_recv.c index 5013ae34c..c85fd453c 100644 --- a/src/os/unix/ngx_recv.c +++ b/src/os/unix/ngx_recv.c @@ -48,6 +48,21 @@ ngx_unix_recv(ngx_connection_t *c, u_char *buf, size_t size) } } +#endif + +#if (NGX_HAVE_EPOLLRDHUP) + + if (ngx_event_flags & NGX_USE_EPOLL_EVENT) { + ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0, + "recv: eof:%d, avail:%d", + rev->pending_eof, rev->available); + + if (!rev->available && !rev->pending_eof) { + rev->ready = 0; + return NGX_AGAIN; + } + } + #endif do { @@ -99,6 +114,24 @@ ngx_unix_recv(ngx_connection_t *c, u_char *buf, size_t size) return n; } +#endif + +#if (NGX_HAVE_EPOLLRDHUP) + + if ((ngx_event_flags & NGX_USE_EPOLL_EVENT) + && ngx_use_epoll_rdhup) + { + if ((size_t) n < size) { + if (!rev->pending_eof) { + rev->ready = 0; + } + + rev->available = 0; + } + + return n; + } + #endif if ((size_t) n < size