fwknop: init script improvements
[feed/packages.git] / net / haproxy / patches / 023-BUG-MEDIUM-connection-add-a-mux-flag-to-indicate-splice-usability.patch
1 commit 7195d4b9396687e67da196cb92ef25b4bd6938d8
2 Author: Willy Tarreau <w@1wt.eu>
3 Date: Fri Jan 17 16:19:34 2020 +0100
4
5 BUG/MEDIUM: connection: add a mux flag to indicate splice usability
6
7 Commit c640ef1a7d ("BUG/MINOR: stream-int: avoid calling rcv_buf() when
8 splicing is still possible") fixed splicing in TCP and legacy mode but
9 broke it badly in HTX mode.
10
11 What happens in HTX mode is that the channel's to_forward value remains
12 set to CHN_INFINITE_FORWARD during the whole transfer, and as such it is
13 not a reliable signal anymore to indicate whether more data are expected
14 or not. Thus, when data are spliced out of the mux using rcv_pipe(), even
15 when the end is reached (that only the mux knows about), the call to
16 rcv_buf() to get the final HTX blocks completing the message were skipped
17 and there was often no new event to wake this up, resulting in transfer
18 timeouts at the end of large objects.
19
20 All this goes down to the fact that the channel has no more information
21 about whether it can splice or not despite being the one having to take
22 the decision to call rcv_pipe() or not. And we cannot afford to call
23 rcv_buf() inconditionally because, as the commit above showed, this
24 reduces the forwarding performance by 2 to 3 in TCP and legacy modes
25 due to data lying in the buffer preventing splicing from being used
26 later.
27
28 The approach taken by this patch consists in offering the muxes the ability
29 to report a bit more information to the upper layers via the conn_stream.
30 This information could simply be to indicate that more data are awaited
31 but the real need being to distinguish splicing and receiving, here
32 instead we clearly report the mux's willingness to be called for splicing
33 or not. Hence the flag's name, CS_FL_MAY_SPLICE.
34
35 The mux sets this flag when it knows that its buffer is empty and that
36 data waiting past what is currently known may be spliced, and clears it
37 when it knows there's no more data or that the caller must fall back to
38 rcv_buf() instead.
39
40 The stream-int code now uses this to determine if splicing may be used
41 or not instead of looking at the rcv_pipe() callbacks through the whole
42 chain. And after the rcv_pipe() call, it checks the flag again to decide
43 whether it may safely skip rcv_buf() or not.
44
45 All this bitfield dance remains a bit complex and it starts to appear
46 obvious that splicing vs reading should be a decision of the mux based
47 on permission granted by the data layer. This would however increase
48 the API's complexity but definitely need to be thought about, and should
49 even significantly simplify the data processing layer.
50
51 The way it was integrated in mux-h1 will also result in no more calls
52 to rcv_pipe() on chunked encoded data, since these ones are currently
53 disabled at the mux level. However once the issue with chunks+splice
54 is fixed, it will be important to explicitly check for curr_len|CHNK
55 to set MAY_SPLICE, so that we don't call rcv_buf() after each chunk.
56
57 This fix must be backported to 2.1 and 2.0.
58
59 (cherry picked from commit 17ccd1a3560a634a17d276833ff41b8063b72206)
60 Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
61
62 diff --git a/include/types/connection.h b/include/types/connection.h
63 index 165a683ae..f2aa63c33 100644
64 --- a/include/types/connection.h
65 +++ b/include/types/connection.h
66 @@ -95,7 +95,7 @@ enum {
67 CS_FL_EOS = 0x00001000, /* End of stream delivered to data layer */
68 /* unused: 0x00002000 */
69 CS_FL_EOI = 0x00004000, /* end-of-input reached */
70 - /* unused: 0x00008000 */
71 + CS_FL_MAY_SPLICE = 0x00008000, /* caller may use rcv_pipe() only if this flag is set */
72 CS_FL_WAIT_FOR_HS = 0x00010000, /* This stream is waiting for handhskae */
73 CS_FL_KILL_CONN = 0x00020000, /* must kill the connection when the CS closes */
74
75 diff --git a/src/mux_h1.c b/src/mux_h1.c
76 index d93a7eab5..b76a58fe4 100644
77 --- a/src/mux_h1.c
78 +++ b/src/mux_h1.c
79 @@ -489,6 +489,9 @@ static struct conn_stream *h1s_new_cs(struct h1s *h1s)
80 if (h1s->flags & H1S_F_NOT_FIRST)
81 cs->flags |= CS_FL_NOT_FIRST;
82
83 + if (global.tune.options & GTUNE_USE_SPLICE)
84 + cs->flags |= CS_FL_MAY_SPLICE;
85 +
86 if (stream_create_from_cs(cs) < 0) {
87 TRACE_DEVEL("leaving on stream creation failure", H1_EV_STRM_NEW|H1_EV_STRM_END|H1_EV_STRM_ERR, h1s->h1c->conn, h1s);
88 goto err;
89 @@ -1275,6 +1278,11 @@ static size_t h1_process_data(struct h1s *h1s, struct h1m *h1m, struct htx **htx
90 goto end;
91 }
92
93 + if (h1m->state == H1_MSG_DATA && h1m->curr_len && h1s->cs)
94 + h1s->cs->flags |= CS_FL_MAY_SPLICE;
95 + else if (h1s->cs)
96 + h1s->cs->flags &= ~CS_FL_MAY_SPLICE;
97 +
98 *ofs += ret;
99
100 end:
101 @@ -2725,6 +2733,9 @@ static int h1_rcv_pipe(struct conn_stream *cs, struct pipe *pipe, unsigned int c
102 TRACE_STATE("read0 on connection", H1_EV_STRM_RECV, cs->conn, h1s);
103 }
104
105 + if (h1m->state != H1_MSG_DATA || !h1m->curr_len)
106 + cs->flags &= ~CS_FL_MAY_SPLICE;
107 +
108 TRACE_LEAVE(H1_EV_STRM_RECV, cs->conn, h1s);
109 return ret;
110 }
111 diff --git a/src/mux_pt.c b/src/mux_pt.c
112 index 6cbc689ce..2ac7d4715 100644
113 --- a/src/mux_pt.c
114 +++ b/src/mux_pt.c
115 @@ -111,6 +111,8 @@ static int mux_pt_init(struct connection *conn, struct proxy *prx, struct sessio
116 conn->ctx = ctx;
117 ctx->cs = cs;
118 cs->flags |= CS_FL_RCV_MORE;
119 + if (global.tune.options & GTUNE_USE_SPLICE)
120 + cs->flags |= CS_FL_MAY_SPLICE;
121 return 0;
122
123 fail_free:
124 diff --git a/src/stream_interface.c b/src/stream_interface.c
125 index 012ac71e0..a2ea7d779 100644
126 --- a/src/stream_interface.c
127 +++ b/src/stream_interface.c
128 @@ -1268,7 +1268,7 @@ int si_cs_recv(struct conn_stream *cs)
129 /* First, let's see if we may splice data across the channel without
130 * using a buffer.
131 */
132 - if (conn->xprt->rcv_pipe && conn->mux->rcv_pipe &&
133 + if (cs->flags & CS_FL_MAY_SPLICE &&
134 (ic->pipe || ic->to_forward >= MIN_SPLICE_FORWARD) &&
135 ic->flags & CF_KERN_SPLICING) {
136 if (c_data(ic)) {
137 @@ -1327,7 +1327,7 @@ int si_cs_recv(struct conn_stream *cs)
138 ic->pipe = NULL;
139 }
140
141 - if (ic->pipe && ic->to_forward && !(flags & CO_RFL_BUF_FLUSH)) {
142 + if (ic->pipe && ic->to_forward && !(flags & CO_RFL_BUF_FLUSH) && cs->flags & CS_FL_MAY_SPLICE) {
143 /* don't break splicing by reading, but still call rcv_buf()
144 * to pass the flag.
145 */