Merge pull request #27 from launchdarkly/eb/sc-136139/jruby-9.2-tests

eli-darkly · web-flow · commit 030d2e7bf608 · 2021-12-29T17:30:04.000-08:00
add JRuby 9.3 CI job and disable flaky tests in JRuby
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -20,6 +20,12 @@ workflows:
           name: JRuby 9.2
           docker-image: jruby:9.2-jdk
           jruby: true
+          skip-end-to-end-http-tests: "y"  # webrick doesn't work reliably in JRuby
+      - build-test-linux:
+          name: JRuby 9.3
+          docker-image: jruby:9.3-jdk
+          jruby: true
+          skip-end-to-end-http-tests: "y"  # webrick doesn't work reliably in JRuby
 
 jobs:
   build-test-linux:
@@ -29,9 +35,14 @@ jobs:
       jruby:
         type: boolean
         default: false
+      skip-end-to-end-http-tests:
+        type: string
+        default: ""
 
     docker:
       - image: <<parameters.docker-image>>
+        environment:
+          LD_SKIP_END_TO_END_HTTP_TESTS: <<parameters.skip-end-to-end-http-tests>>
 
     steps:
       - checkout
diff --git a/lib/ld-eventsource/client.rb b/lib/ld-eventsource/client.rb
@@ -306,6 +306,9 @@ def read_stream(cxn)
           else
             begin
               data = cxn.readpartial
+              # readpartial gives us a string, which may not be a valid UTF-8 string because a
+              # multi-byte character might not yet have been fully read, but BufferedLineReader
+              # will handle that.
             rescue HTTP::TimeoutError 
               # For historical reasons, we rethrow this as our own type
               raise Errors::ReadTimeoutError.new(@read_timeout)
diff --git a/lib/ld-eventsource/impl/buffered_line_reader.rb b/lib/ld-eventsource/impl/buffered_line_reader.rb
@@ -9,17 +9,21 @@ class BufferedLineReader
       # input data runs out, the output enumerator ends and does not include any partially
       # completed line.
       #
-      # @param [Enumerator] chunks  an enumerator that will yield strings from a stream
-      # @return [Enumerator]  an enumerator that will yield one line at a time
+      # @param [Enumerator] chunks  an enumerator that will yield strings from a stream -
+      #  these are treated as raw UTF-8 bytes, regardless of the string's declared encoding
+      #  (so it is OK if a multi-byte character is split across chunks); if the declared
+      #  encoding of the chunk is not ASCII-8BIT, it will be changed to ASCII-8BIT in place
+      # @return [Enumerator]  an enumerator that will yield one line at a time in UTF-8
       #
       def self.lines_from(chunks)
-        buffer = ""
+        buffer = "".b
         position = 0
         line_start = 0
         last_char_was_cr = false
 
         Enumerator.new do |gen|
           chunks.each do |chunk|
+            chunk.force_encoding("ASCII-8BIT")
             buffer << chunk
 
             loop do
@@ -47,7 +51,12 @@ def self.lines_from(chunks)
                 next
               end
 
-              line = buffer[line_start, i - line_start]
+              line = buffer[line_start, i - line_start].force_encoding("UTF-8")
+              # Calling force_encoding just declares that we believe the encoding of this string to be
+              # UTF-8 (which is the only encoding allowed in the SSE protocol); it doesn't cause any
+              # re-decoding of the string. The previous line-parsing steps were done on raw 8-bit
+              # strings so that it won't try to do any UTF-8 decoding on intermediate slices.
+
               last_char_was_cr = false
               i += 1
               if ch == "\r"
diff --git a/spec/buffered_line_reader_spec.rb b/spec/buffered_line_reader_spec.rb
@@ -74,4 +74,24 @@ def tests_for_terminator(term, desc)
       "fourth line", "", "last"]
     expect(subject.lines_from(chunks).to_a).to eq(expected)
   end
+
+  it "decodes from UTF-8" do
+    text = "abc€豆腐xyz"
+    chunks = [(text + "\n").encode("UTF-8").b]
+    expected = [text]
+    expect(subject.lines_from(chunks).to_a).to eq(expected)
+  end
+
+  it "decodes from UTF-8 when multi-byte characters are split across chunks" do
+    text = "abc€豆腐xyz"
+    raw = (text + "\n").encode("UTF-8").b
+    chunks = raw.bytes.to_a.map{ |byte| byte.chr.force_encoding("UTF-8") }
+    # Calling force_encoding("UTF-8") here simulates the behavior of the http gem's
+    # readpartial method. It actually returns undecoded bytes that might include an
+    # incomplete multi-byte character, but the string's decoding could still be
+    # declared as UTF-8. So we are making sure that BufferedLineReader correctly
+    # handles such a case.
+    expected = [text]
+    expect(subject.lines_from(chunks).to_a).to eq(expected)
+  end
 end
diff --git a/spec/client_spec.rb b/spec/client_spec.rb
@@ -5,6 +5,10 @@
 # End-to-end tests of the SSE client against a real server
 #
 describe SSE::Client do
+  before(:each) do
+    skip("end-to-end HTTP tests are disabled because they're unreliable on this platform") if !stub_http_server_available?
+  end
+
   subject { SSE::Client }
 
   let(:simple_event_1) { SSE::StreamEvent.new(:go, "foo")}
diff --git a/spec/http_stub.rb b/spec/http_stub.rb
@@ -2,6 +2,11 @@
 require "webrick/httpproxy"
 require "webrick/https"
 
+def stub_http_server_available?
+  flag = ENV["LD_SKIP_END_TO_END_HTTP_TESTS"]
+  flag.nil? || flag == ""
+end
+
 class StubHTTPServer
   attr_reader :port