File tree Expand file tree Collapse file tree 6 files changed +56
-4
lines changed Expand file tree Collapse file tree 6 files changed +56
-4
lines changed Original file line number Diff line number Diff line change @@ -20,6 +20,12 @@ workflows:
2020 name : JRuby 9.2
2121 docker-image : jruby:9.2-jdk
2222 jruby : true
23+ skip-end-to-end-http-tests : " y" # webrick doesn't work reliably in JRuby
24+ - build-test-linux :
25+ name : JRuby 9.3
26+ docker-image : jruby:9.3-jdk
27+ jruby : true
28+ skip-end-to-end-http-tests : " y" # webrick doesn't work reliably in JRuby
2329
2430jobs :
2531 build-test-linux :
2935 jruby :
3036 type : boolean
3137 default : false
38+ skip-end-to-end-http-tests :
39+ type : string
40+ default : " "
3241
3342 docker :
3443 - image : <<parameters.docker-image>>
44+ environment :
45+ LD_SKIP_END_TO_END_HTTP_TESTS : <<parameters.skip-end-to-end-http-tests>>
3546
3647 steps :
3748 - checkout
Original file line number Diff line number Diff line change @@ -306,6 +306,9 @@ def read_stream(cxn)
306306 else
307307 begin
308308 data = cxn . readpartial
309+ # readpartial gives us a string, which may not be a valid UTF-8 string because a
310+ # multi-byte character might not yet have been fully read, but BufferedLineReader
311+ # will handle that.
309312 rescue HTTP ::TimeoutError
310313 # For historical reasons, we rethrow this as our own type
311314 raise Errors ::ReadTimeoutError . new ( @read_timeout )
Original file line number Diff line number Diff line change @@ -9,17 +9,21 @@ class BufferedLineReader
99 # input data runs out, the output enumerator ends and does not include any partially
1010 # completed line.
1111 #
12- # @param [Enumerator] chunks an enumerator that will yield strings from a stream
13- # @return [Enumerator] an enumerator that will yield one line at a time
12+ # @param [Enumerator] chunks an enumerator that will yield strings from a stream -
13+ # these are treated as raw UTF-8 bytes, regardless of the string's declared encoding
14+ # (so it is OK if a multi-byte character is split across chunks); if the declared
15+ # encoding of the chunk is not ASCII-8BIT, it will be changed to ASCII-8BIT in place
16+ # @return [Enumerator] an enumerator that will yield one line at a time in UTF-8
1417 #
1518 def self . lines_from ( chunks )
16- buffer = ""
19+ buffer = "" . b
1720 position = 0
1821 line_start = 0
1922 last_char_was_cr = false
2023
2124 Enumerator . new do |gen |
2225 chunks . each do |chunk |
26+ chunk . force_encoding ( "ASCII-8BIT" )
2327 buffer << chunk
2428
2529 loop do
@@ -47,7 +51,12 @@ def self.lines_from(chunks)
4751 next
4852 end
4953
50- line = buffer [ line_start , i - line_start ]
54+ line = buffer [ line_start , i - line_start ] . force_encoding ( "UTF-8" )
55+ # Calling force_encoding just declares that we believe the encoding of this string to be
56+ # UTF-8 (which is the only encoding allowed in the SSE protocol); it doesn't cause any
57+ # re-decoding of the string. The previous line-parsing steps were done on raw 8-bit
58+ # strings so that it won't try to do any UTF-8 decoding on intermediate slices.
59+
5160 last_char_was_cr = false
5261 i += 1
5362 if ch == "\r "
Original file line number Diff line number Diff line change @@ -74,4 +74,24 @@ def tests_for_terminator(term, desc)
7474 "fourth line" , "" , "last" ]
7575 expect ( subject . lines_from ( chunks ) . to_a ) . to eq ( expected )
7676 end
77+
78+ it "decodes from UTF-8" do
79+ text = "abc€豆腐xyz"
80+ chunks = [ ( text + "\n " ) . encode ( "UTF-8" ) . b ]
81+ expected = [ text ]
82+ expect ( subject . lines_from ( chunks ) . to_a ) . to eq ( expected )
83+ end
84+
85+ it "decodes from UTF-8 when multi-byte characters are split across chunks" do
86+ text = "abc€豆腐xyz"
87+ raw = ( text + "\n " ) . encode ( "UTF-8" ) . b
88+ chunks = raw . bytes . to_a . map { |byte | byte . chr . force_encoding ( "UTF-8" ) }
89+ # Calling force_encoding("UTF-8") here simulates the behavior of the http gem's
90+ # readpartial method. It actually returns undecoded bytes that might include an
91+ # incomplete multi-byte character, but the string's decoding could still be
92+ # declared as UTF-8. So we are making sure that BufferedLineReader correctly
93+ # handles such a case.
94+ expected = [ text ]
95+ expect ( subject . lines_from ( chunks ) . to_a ) . to eq ( expected )
96+ end
7797end
Original file line number Diff line number Diff line change 55# End-to-end tests of the SSE client against a real server
66#
77describe SSE ::Client do
8+ before ( :each ) do
9+ skip ( "end-to-end HTTP tests are disabled because they're unreliable on this platform" ) if !stub_http_server_available?
10+ end
11+
812 subject { SSE ::Client }
913
1014 let ( :simple_event_1 ) { SSE ::StreamEvent . new ( :go , "foo" ) }
Original file line number Diff line number Diff line change 22require "webrick/httpproxy"
33require "webrick/https"
44
5+ def stub_http_server_available?
6+ flag = ENV [ "LD_SKIP_END_TO_END_HTTP_TESTS" ]
7+ flag . nil? || flag == ""
8+ end
9+
510class StubHTTPServer
611 attr_reader :port
712
You can’t perform that action at this time.
0 commit comments