[youporn] Fix upload date extraction and make comment count optional (closes #26986)

This commit is contained in:
Sergey M․ 2020-11-19 05:16:25 +07:00
parent 91e954587f
commit b1347a5881
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -29,7 +29,6 @@ class YouPornIE(InfoExtractor):
'upload_date': '20101217', 'upload_date': '20101217',
'average_rating': int, 'average_rating': int,
'view_count': int, 'view_count': int,
'comment_count': int,
'categories': list, 'categories': list,
'tags': list, 'tags': list,
'age_limit': 18, 'age_limit': 18,
@ -48,7 +47,6 @@ class YouPornIE(InfoExtractor):
'upload_date': '20110418', 'upload_date': '20110418',
'average_rating': int, 'average_rating': int,
'view_count': int, 'view_count': int,
'comment_count': int,
'categories': list, 'categories': list,
'tags': list, 'tags': list,
'age_limit': 18, 'age_limit': 18,
@ -156,7 +154,8 @@ class YouPornIE(InfoExtractor):
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>', r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
webpage, 'uploader', fatal=False) webpage, 'uploader', fatal=False)
upload_date = unified_strdate(self._html_search_regex( upload_date = unified_strdate(self._html_search_regex(
[r'Date\s+[Aa]dded:\s*<span>([^<]+)', [r'UPLOADED:\s*<span>([^<]+)',
r'Date\s+[Aa]dded:\s*<span>([^<]+)',
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'], r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
webpage, 'upload date', fatal=False)) webpage, 'upload date', fatal=False))
@ -171,7 +170,7 @@ class YouPornIE(InfoExtractor):
webpage, 'view count', fatal=False, group='count')) webpage, 'view count', fatal=False, group='count'))
comment_count = str_to_int(self._search_regex( comment_count = str_to_int(self._search_regex(
r'>All [Cc]omments? \(([\d,.]+)\)', r'>All [Cc]omments? \(([\d,.]+)\)',
webpage, 'comment count', fatal=False)) webpage, 'comment count', default=None))
def extract_tag_box(regex, title): def extract_tag_box(regex, title):
tag_box = self._search_regex(regex, webpage, title, default=None) tag_box = self._search_regex(regex, webpage, title, default=None)