| Here is a python script that I wrote that removes all the srcset tags that you
can use after your website is downloaded.
#Given a PATH go through all the html files and delete all of the srcset
subtags for img tags.
#files will be overwritten
from bs4 import BeautifulSoup
import os
from glob import glob
import codecs
PATH = "<downloaded website path>"
result = [y for x in os.walk(PATH) for y in glob(os.path.join(x[0],
'*.html'))]
for filename in result:
print filename
file = codecs.open(filename,'r','utf-8')
data = file.read()
soup = BeautifulSoup(data, 'html.parser')
for p in soup.find_all('img'):
if 'srcset' in p.attrs:
del p.attrs['srcset']
file.close()
file1 = codecs.open(filename,'w','utf-8')
file1.write(soup.prettify())
file1.close()
| |