diff --git a/.travis.yml b/.travis.yml index 4bb33266..4d4dc089 100644 --- a/.travis.yml +++ b/.travis.yml @@ -32,6 +32,7 @@ addons: - g++-6 - libprotobuf-dev - protobuf-compiler + - libicu-dev rvm: - 2.3.4 diff --git a/Aptfile b/Aptfile index 0456343e..3af0956e 100644 --- a/Aptfile +++ b/Aptfile @@ -3,3 +3,4 @@ libprotobuf-dev ffmpeg libxdamage1 libxfixes3 +libicu-dev diff --git a/Dockerfile b/Dockerfile index 7033cddd..97a69139 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,6 +25,7 @@ RUN echo "@edge https://nl.alpinelinux.org/alpine/edge/main" >> /etc/apk/reposit ffmpeg \ file \ git \ + icu-dev \ imagemagick@edge \ libpq \ libxml2 \ diff --git a/Gemfile b/Gemfile index 95c74eef..b52685cb 100644 --- a/Gemfile +++ b/Gemfile @@ -22,6 +22,7 @@ gem 'active_model_serializers', '~> 0.10' gem 'addressable', '~> 2.5' gem 'bootsnap' gem 'browser' +gem 'charlock_holmes', '~> 0.7.3' gem 'cld3', '~> 3.1' gem 'devise', '~> 4.2' gem 'devise-two-factor', '~> 3.0' diff --git a/Gemfile.lock b/Gemfile.lock index 71f83f73..de0d6a10 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -106,6 +106,7 @@ GEM rack (>= 1.0.0) rack-test (>= 0.5.4) xpath (~> 2.0) + charlock_holmes (0.7.3) case_transform (0.2) activesupport chunky_png (1.3.8) @@ -501,6 +502,7 @@ DEPENDENCIES capistrano-rbenv (~> 2.1) capistrano-yarn (~> 2.0) capybara (~> 2.14) + charlock_holmes (~> 0.7.3) cld3 (~> 3.1) climate_control (~> 0.2) devise (~> 4.2) diff --git a/Vagrantfile b/Vagrantfile index 1f56fcfb..cbe6623b 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -37,6 +37,7 @@ sudo apt-get install \ yarn \ libprotobuf-dev \ libreadline-dev \ + libicu-dev \ -y # Install rvm diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb index 8ddaa2bf..6ef3abb6 100644 --- a/app/services/fetch_link_card_service.rb +++ b/app/services/fetch_link_card_service.rb @@ -1,5 +1,4 @@ # frozen_string_literal: true -require 'nkf' class FetchLinkCardService < BaseService include HttpHelper @@ -86,7 +85,12 @@ class FetchLinkCardService < BaseService return if response.code != 200 || response.mime_type != 'text/html' html = response.to_s - page = Nokogiri::HTML(html, nil, NKF.guess(html).to_s) + + detector = CharlockHolmes::EncodingDetector.new + detector.strip_tags = true + + guess = detector.detect(html, response.charset) + page = Nokogiri::HTML(html, nil, guess&.fetch(:encoding)) card.type = :link card.title = meta_property(page, 'og:title') || page.at_xpath('//title')&.content diff --git a/spec/fixtures/requests/koi8-r.txt b/spec/fixtures/requests/koi8-r.txt new file mode 100644 index 00000000..d4242af0 --- /dev/null +++ b/spec/fixtures/requests/koi8-r.txt @@ -0,0 +1,20 @@ +HTTP/1.1 200 OK +Server: nginx/1.11.10 +Date: Tue, 04 Jul 2017 16:43:39 GMT +Content-Type: text/html +Content-Length: 273 +Connection: keep-alive +Last-Modified: Tue, 04 Jul 2017 16:41:34 GMT +Accept-Ranges: bytes + + +
+ + +