[fox9] fix extraction

This commit is contained in:
Remita Amine 2019-10-29 09:43:17 +01:00
parent c4bd9cb7bb
commit 7455832f31
2 changed files with 25 additions and 23 deletions

View file

@ -367,7 +367,10 @@ from .fourtube import (
FuxIE, FuxIE,
) )
from .fox import FOXIE from .fox import FOXIE
from .fox9 import FOX9IE from .fox9 import (
FOX9IE,
FOX9NewsIE,
)
from .foxgay import FoxgayIE from .foxgay import FoxgayIE
from .foxnews import ( from .foxnews import (
FoxNewsIE, FoxNewsIE,

View file

@ -1,13 +1,23 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .anvato import AnvatoIE from .common import InfoExtractor
class FOX9IE(AnvatoIE): class FOX9IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P<id>\d+)-story' _VALID_URL = r'https?://(?:www\.)?fox9\.com/video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.fox9.com/news/215123287-story', def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
'anvato:anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b:' + video_id,
'Anvato', video_id)
class FOX9NewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fox9\.com/news/(?P<id>[^/?&#]+)'
_TEST = {
'url': 'https://www.fox9.com/news/black-bear-in-tree-draws-crowd-in-downtown-duluth-minnesota',
'md5': 'd6e1b2572c3bab8a849c9103615dd243', 'md5': 'd6e1b2572c3bab8a849c9103615dd243',
'info_dict': { 'info_dict': {
'id': '314473', 'id': '314473',
@ -21,22 +31,11 @@ class FOX9IE(AnvatoIE):
'categories': ['News', 'Sports'], 'categories': ['News', 'Sports'],
'tags': ['news', 'video'], 'tags': ['news', 'video'],
}, },
}, { }
'url': 'http://www.fox9.com/news/investigators/214070684-story',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
webpage = self._download_webpage(url, video_id) anvato_id = self._search_regex(
r'anvatoId\s*:\s*[\'"](\d+)', webpage, 'anvato id')
video_id = self._parse_json( return self.url_result('https://www.fox9.com/video/' + anvato_id, 'FOX9')
self._search_regex(
r"this\.videosJson\s*=\s*'(\[.+?\])';",
webpage, 'anvato playlist'),
video_id)[0]['video']
return self._get_anvato_videos(
'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b',
video_id)