1
0
Fork 0
mirror of https://gitlab.com/dstftw/youtube-dl.git synced 2020-11-16 09:42:26 +00:00

[utlis] add extract_attributes for extracting html tags attributes

This commit is contained in:
remitamine 2015-09-11 04:44:17 +01:00
parent 1721fef28b
commit 689fb748ee

View file

@ -248,6 +248,14 @@ def get_element_by_attribute(attribute, value, html):
return unescapeHTML(res)
def extract_attributes(attributes_str, attributes_regex=r'(?s)\s*([^\s=]+)\s*=\s*["\']([^"\']+)["\']'):
attributes = re.findall(attributes_regex, attributes_str)
attributes_dict = {}
if attributes:
attributes_dict = {attribute_name: attribute_value for (attribute_name, attribute_value) in attributes}
return attributes_dict
def clean_html(html):
"""Clean an HTML snippet into a readable string"""