Language improvements, replace whatlanguage with CLD (#2753)

* add failing en specs

* add cld2 gem

* Replace WhatLanguage with CLD
This commit is contained in:
Matt Jankowski 2017-05-03 10:59:31 -04:00 committed by Eugen Rochko
parent 53384b0ffe
commit 8c5ad23b24
4 changed files with 32 additions and 12 deletions

View file

@ -20,6 +20,7 @@ gem 'paperclip', '~> 5.1'
gem 'paperclip-av-transcoder' gem 'paperclip-av-transcoder'
gem 'addressable' gem 'addressable'
gem 'cld2', require: 'cld'
gem 'devise' gem 'devise'
gem 'devise-two-factor' gem 'devise-two-factor'
gem 'doorkeeper' gem 'doorkeeper'
@ -56,7 +57,6 @@ gem 'statsd-instrument'
gem 'twitter-text' gem 'twitter-text'
gem 'tzinfo-data' gem 'tzinfo-data'
gem 'webpacker', '~>1.2' gem 'webpacker', '~>1.2'
gem 'whatlanguage'
# For some reason the view specs start failing without this # For some reason the view specs start failing without this
gem 'react-rails' gem 'react-rails'

View file

@ -102,6 +102,8 @@ GEM
rack-test (>= 0.5.4) rack-test (>= 0.5.4)
xpath (~> 2.0) xpath (~> 2.0)
chunky_png (1.3.8) chunky_png (1.3.8)
cld2 (1.0.3)
ffi (~> 1.9.3)
climate_control (0.1.0) climate_control (0.1.0)
cocaine (0.5.8) cocaine (0.5.8)
climate_control (>= 0.0.3, < 1.0) climate_control (>= 0.0.3, < 1.0)
@ -153,6 +155,7 @@ GEM
faker (1.7.3) faker (1.7.3)
i18n (~> 0.5) i18n (~> 0.5)
fast_blank (1.0.0) fast_blank (1.0.0)
ffi (1.9.18)
fuubar (2.2.0) fuubar (2.2.0)
rspec-core (~> 3.0) rspec-core (~> 3.0)
ruby-progressbar (~> 1.4) ruby-progressbar (~> 1.4)
@ -463,7 +466,6 @@ GEM
websocket-driver (0.6.5) websocket-driver (0.6.5)
websocket-extensions (>= 0.1.0) websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.2) websocket-extensions (0.1.2)
whatlanguage (1.0.6)
xpath (2.0.0) xpath (2.0.0)
nokogiri (~> 1.3) nokogiri (~> 1.3)
@ -484,6 +486,7 @@ DEPENDENCIES
capistrano-rbenv capistrano-rbenv
capistrano-yarn capistrano-yarn
capybara capybara
cld2
devise devise
devise-two-factor devise-two-factor
doorkeeper doorkeeper
@ -549,7 +552,6 @@ DEPENDENCIES
uglifier (>= 1.3.0) uglifier (>= 1.3.0)
webmock webmock
webpacker (~> 1.2) webpacker (~> 1.2)
whatlanguage
RUBY VERSION RUBY VERSION
ruby 2.4.1p111 ruby 2.4.1p111

View file

@ -9,11 +9,23 @@ class LanguageDetector
end end
def to_iso_s def to_iso_s
WhatLanguage.new(:all).language_iso(text_without_urls) || default_locale.to_sym detected_language_code || default_locale.to_sym
end end
private private
def detected_language_code
detected_language[:code].to_sym if detected_language_reliable?
end
def detected_language
@_detected_language ||= CLD.detect_language(text_without_urls)
end
def detected_language_reliable?
detected_language[:reliable]
end
def text_without_urls def text_without_urls
text.dup.tap do |new_text| text.dup.tap do |new_text|
URI.extract(new_text).each do |url| URI.extract(new_text).each do |url|

View file

@ -3,11 +3,17 @@ require 'rails_helper'
describe LanguageDetector do describe LanguageDetector do
describe 'to_iso_s' do describe 'to_iso_s' do
it 'detects english language' do it 'detects english language for basic strings' do
string = 'Hello and welcome to mastodon' strings = [
"Hello and welcome to mastodon",
"I'd rather not!",
"a lot of people just want to feel righteous all the time and that's all that matters",
]
strings.each do |string|
result = described_class.new(string).to_iso_s result = described_class.new(string).to_iso_s
expect(result).to eq :en expect(result).to eq(:en), string
end
end end
it 'detects spanish language' do it 'detects spanish language' do
@ -19,15 +25,15 @@ describe LanguageDetector do
describe 'when language can\'t be detected' do describe 'when language can\'t be detected' do
it 'confirm language engine cant detect' do it 'confirm language engine cant detect' do
result = WhatLanguage.new(:all).language_iso('') result = CLD.detect_language('')
expect(result).to be_nil expect(result[:reliable]).to be false
end end
describe 'because of a URL' do describe 'because of a URL' do
it 'uses default locale when sent just a URL' do it 'uses default locale when sent just a URL' do
string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4' string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
wl_result = WhatLanguage.new(:all).language_iso(string) cld_result = CLD.detect_language(string)[:code]
expect(wl_result).not_to eq :en expect(cld_result).not_to eq :en
result = described_class.new(string).to_iso_s result = described_class.new(string).to_iso_s