Language improvements, replace whatlanguage with CLD (#2753)
* add failing en specs * add cld2 gem * Replace WhatLanguage with CLD
This commit is contained in:
parent
53384b0ffe
commit
8c5ad23b24
2
Gemfile
2
Gemfile
|
@ -20,6 +20,7 @@ gem 'paperclip', '~> 5.1'
|
||||||
gem 'paperclip-av-transcoder'
|
gem 'paperclip-av-transcoder'
|
||||||
|
|
||||||
gem 'addressable'
|
gem 'addressable'
|
||||||
|
gem 'cld2', require: 'cld'
|
||||||
gem 'devise'
|
gem 'devise'
|
||||||
gem 'devise-two-factor'
|
gem 'devise-two-factor'
|
||||||
gem 'doorkeeper'
|
gem 'doorkeeper'
|
||||||
|
@ -56,7 +57,6 @@ gem 'statsd-instrument'
|
||||||
gem 'twitter-text'
|
gem 'twitter-text'
|
||||||
gem 'tzinfo-data'
|
gem 'tzinfo-data'
|
||||||
gem 'webpacker', '~>1.2'
|
gem 'webpacker', '~>1.2'
|
||||||
gem 'whatlanguage'
|
|
||||||
|
|
||||||
# For some reason the view specs start failing without this
|
# For some reason the view specs start failing without this
|
||||||
gem 'react-rails'
|
gem 'react-rails'
|
||||||
|
|
|
@ -102,6 +102,8 @@ GEM
|
||||||
rack-test (>= 0.5.4)
|
rack-test (>= 0.5.4)
|
||||||
xpath (~> 2.0)
|
xpath (~> 2.0)
|
||||||
chunky_png (1.3.8)
|
chunky_png (1.3.8)
|
||||||
|
cld2 (1.0.3)
|
||||||
|
ffi (~> 1.9.3)
|
||||||
climate_control (0.1.0)
|
climate_control (0.1.0)
|
||||||
cocaine (0.5.8)
|
cocaine (0.5.8)
|
||||||
climate_control (>= 0.0.3, < 1.0)
|
climate_control (>= 0.0.3, < 1.0)
|
||||||
|
@ -153,6 +155,7 @@ GEM
|
||||||
faker (1.7.3)
|
faker (1.7.3)
|
||||||
i18n (~> 0.5)
|
i18n (~> 0.5)
|
||||||
fast_blank (1.0.0)
|
fast_blank (1.0.0)
|
||||||
|
ffi (1.9.18)
|
||||||
fuubar (2.2.0)
|
fuubar (2.2.0)
|
||||||
rspec-core (~> 3.0)
|
rspec-core (~> 3.0)
|
||||||
ruby-progressbar (~> 1.4)
|
ruby-progressbar (~> 1.4)
|
||||||
|
@ -463,7 +466,6 @@ GEM
|
||||||
websocket-driver (0.6.5)
|
websocket-driver (0.6.5)
|
||||||
websocket-extensions (>= 0.1.0)
|
websocket-extensions (>= 0.1.0)
|
||||||
websocket-extensions (0.1.2)
|
websocket-extensions (0.1.2)
|
||||||
whatlanguage (1.0.6)
|
|
||||||
xpath (2.0.0)
|
xpath (2.0.0)
|
||||||
nokogiri (~> 1.3)
|
nokogiri (~> 1.3)
|
||||||
|
|
||||||
|
@ -484,6 +486,7 @@ DEPENDENCIES
|
||||||
capistrano-rbenv
|
capistrano-rbenv
|
||||||
capistrano-yarn
|
capistrano-yarn
|
||||||
capybara
|
capybara
|
||||||
|
cld2
|
||||||
devise
|
devise
|
||||||
devise-two-factor
|
devise-two-factor
|
||||||
doorkeeper
|
doorkeeper
|
||||||
|
@ -549,7 +552,6 @@ DEPENDENCIES
|
||||||
uglifier (>= 1.3.0)
|
uglifier (>= 1.3.0)
|
||||||
webmock
|
webmock
|
||||||
webpacker (~> 1.2)
|
webpacker (~> 1.2)
|
||||||
whatlanguage
|
|
||||||
|
|
||||||
RUBY VERSION
|
RUBY VERSION
|
||||||
ruby 2.4.1p111
|
ruby 2.4.1p111
|
||||||
|
|
|
@ -9,11 +9,23 @@ class LanguageDetector
|
||||||
end
|
end
|
||||||
|
|
||||||
def to_iso_s
|
def to_iso_s
|
||||||
WhatLanguage.new(:all).language_iso(text_without_urls) || default_locale.to_sym
|
detected_language_code || default_locale.to_sym
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
|
def detected_language_code
|
||||||
|
detected_language[:code].to_sym if detected_language_reliable?
|
||||||
|
end
|
||||||
|
|
||||||
|
def detected_language
|
||||||
|
@_detected_language ||= CLD.detect_language(text_without_urls)
|
||||||
|
end
|
||||||
|
|
||||||
|
def detected_language_reliable?
|
||||||
|
detected_language[:reliable]
|
||||||
|
end
|
||||||
|
|
||||||
def text_without_urls
|
def text_without_urls
|
||||||
text.dup.tap do |new_text|
|
text.dup.tap do |new_text|
|
||||||
URI.extract(new_text).each do |url|
|
URI.extract(new_text).each do |url|
|
||||||
|
|
|
@ -3,11 +3,17 @@ require 'rails_helper'
|
||||||
|
|
||||||
describe LanguageDetector do
|
describe LanguageDetector do
|
||||||
describe 'to_iso_s' do
|
describe 'to_iso_s' do
|
||||||
it 'detects english language' do
|
it 'detects english language for basic strings' do
|
||||||
string = 'Hello and welcome to mastodon'
|
strings = [
|
||||||
|
"Hello and welcome to mastodon",
|
||||||
|
"I'd rather not!",
|
||||||
|
"a lot of people just want to feel righteous all the time and that's all that matters",
|
||||||
|
]
|
||||||
|
strings.each do |string|
|
||||||
result = described_class.new(string).to_iso_s
|
result = described_class.new(string).to_iso_s
|
||||||
|
|
||||||
expect(result).to eq :en
|
expect(result).to eq(:en), string
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'detects spanish language' do
|
it 'detects spanish language' do
|
||||||
|
@ -19,15 +25,15 @@ describe LanguageDetector do
|
||||||
|
|
||||||
describe 'when language can\'t be detected' do
|
describe 'when language can\'t be detected' do
|
||||||
it 'confirm language engine cant detect' do
|
it 'confirm language engine cant detect' do
|
||||||
result = WhatLanguage.new(:all).language_iso('')
|
result = CLD.detect_language('')
|
||||||
expect(result).to be_nil
|
expect(result[:reliable]).to be false
|
||||||
end
|
end
|
||||||
|
|
||||||
describe 'because of a URL' do
|
describe 'because of a URL' do
|
||||||
it 'uses default locale when sent just a URL' do
|
it 'uses default locale when sent just a URL' do
|
||||||
string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
|
string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
|
||||||
wl_result = WhatLanguage.new(:all).language_iso(string)
|
cld_result = CLD.detect_language(string)[:code]
|
||||||
expect(wl_result).not_to eq :en
|
expect(cld_result).not_to eq :en
|
||||||
|
|
||||||
result = described_class.new(string).to_iso_s
|
result = described_class.new(string).to_iso_s
|
||||||
|
|
||||||
|
|
Reference in a new issue