Add CSpanIE (closes #312)

This commit is contained in:
Jaime Marquínez Ferrándiz 2013-06-26 17:55:54 +02:00
parent 2e32528012
commit aa0c87391c
3 changed files with 56 additions and 0 deletions

View file

@ -695,5 +695,15 @@
"info_dict": { "info_dict": {
"title": "卡马乔国足开大脚长传冲吊集锦" "title": "卡马乔国足开大脚长传冲吊集锦"
} }
},
{
"name": "CSpan",
"url": "http://www.c-spanvideo.org/program/HolderonV",
"file": "315139.flv",
"md5": "74a623266956f69e4df0068ab6c80fe4",
"info_dict": {
"title": "Attorney General Eric Holder on Voting Rights Act Decision"
},
"skip": "Requires rtmpdump"
} }
] ]

View file

@ -6,6 +6,7 @@ from .bliptv import BlipTVIE, BlipTVUserIE
from .breakcom import BreakIE from .breakcom import BreakIE
from .collegehumor import CollegeHumorIE from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE from .comedycentral import ComedyCentralIE
from .cspan import CSpanIE
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE
from .depositfiles import DepositFilesIE from .depositfiles import DepositFilesIE
from .eighttracks import EightTracksIE from .eighttracks import EightTracksIE
@ -132,6 +133,7 @@ def gen_extractors():
VevoIE(), VevoIE(),
JukeboxIE(), JukeboxIE(),
TudouIE(), TudouIE(),
CSpanIE(),
GenericIE() GenericIE()
] ]

View file

@ -0,0 +1,44 @@
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
)
class CSpanIE(InfoExtractor):
_VALID_URL = r'http://www.c-spanvideo.org/program/(.*)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
prog_name = mobj.group(1)
webpage = self._download_webpage(url, prog_name)
video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id')
data = compat_urllib_parse.urlencode({'programid': video_id,
'dynamic':'1'})
info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data
video_info = self._download_webpage(info_url, video_id, u'Downloading video info')
self.report_extraction(video_id)
title = self._html_search_regex(r'<string name="title">(.*?)</string>',
video_info, 'title')
description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"',
webpage, 'description',
flags=re.MULTILINE|re.DOTALL)
thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.*?)"',
webpage, 'thumbnail')
url = self._search_regex(r'<string name="URL">(.*?)</string>',
video_info, 'video url')
url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443')
path = self._search_regex(r'<string name="path">(.*?)</string>',
video_info, 'rtmp play path')
return {'id': video_id,
'title': title,
'ext': 'flv',
'url': url,
'play_path': path,
'description': description,
'thumbnail': thumbnail,
}