Re: picture tag with source subtags - HTTrack Website Copier Forum

Subject: Re: picture tag with source subtags

Author: _pA89

Date: 11/03/2025 21:16

This I generated with Help Of Author-Mitch B and ChatGPT WORKING

#Subject: Re: picture tag with source subtags
#Author: Mitch B
#Date: 06/22/2017 23:41
# 	
#Here is a python script that I wrote that removes all the srcset tags that
you
#can use after your website is downloaded.  

#Given a PATH go through all the html files and delete all of the srcset
subtags for img tags.
#files will be overwritten

from bs4 import BeautifulSoup
import os
from glob import glob
import codecs
import sys

# Python 2/3 compatible tkinter import
try:
    import tkinter as tk
    from tkinter import messagebox
except ImportError:
    import Tkinter as tk
    import tkMessageBox as messagebox


#PATH = "<downloaded website path>"

# Keep File in Project Folder (Not Inside Website Folder and Run 
# >>> Auto detect HTTrack website folder (first folder next to this script)
#script_dir = os.path.dirname(os.path.abspath(__file__))
#subdirs = [d for d in os.listdir(script_dir) if
os.path.isdir(os.path.join(script_dir, d))] #Remove this line if Put in
Website Folder

# Python 2/3 compatible __file__ handling
try:
    script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))

# >>> Auto detect HTTrack website folder (first folder next to this script)
# Remove this block entirely if script is placed *inside* website folder
subdirs = []
for d in os.listdir(script_dir):
    full = os.path.join(script_dir, d)
    if os.path.isdir(full) and not d.startswith('.'):
        subdirs.append(d)

if not subdirs:
    print("No website folder found next to script.")
    sys.exit(1)

PATH = os.path.join(script_dir, subdirs[0])
# <<<


# GUI confirmation box
----------------------------------------------------------------
root = tk.Tk()
root.withdraw()# Hide main window

msg = "Script will modify HTML files in:\n\n{}\n\nProceed?".format(PATH)
answer = messagebox.askokcancel("Confirm Path", msg)

if not answer:
    print("Operation cancelled by user.")
    sys.exit(0)
# GUI confirmation box
----------------------------------------------------------------    



#result = [y for x in os.walk(PATH) for y in
glob(os.path.join(x[0],'*.html'))]

print("\nScanning for HTML files...\n")
result = [y for x in os.walk(PATH) for y in
glob(os.path.join(x[0],'*.html'))]
total_files = len(result)

print("Total HTML files found: {}\n".format(total_files))
print("Beginning srcset removal...\n")



#for filename in result:
#    print(filename)
#    file = codecs.open(filename,'r','utf-8')
#    data = file.read()
#    soup = BeautifulSoup(data, 'html.parser')
#
#    for p in soup.find_all('img'):
#        if 'srcset' in p.attrs:
#            del p.attrs['srcset']
#
#    file.close()
#    file1 = codecs.open(filename,'w','utf-8')
#    file1.write(soup.prettify())
#    file1.close()



processed = 0
modified = 0

for filename in result:
    print("[PROCESSING] {}".format(filename))
    processed += 1

    file = codecs.open(filename,'r','utf-8')
    data = file.read()
    soup = BeautifulSoup(data, 'html.parser')

    changed = False
    # Remove srcset from <img>
    for p in soup.find_all('img'):
        if 'srcset' in p.attrs:
            del p.attrs['srcset']
            changed = True
    
    # Remove srcset from <source>
    for s in soup.find_all('source'):
        if 'srcset' in s.attrs:
            del s.attrs['srcset']
            changed = True

    file.close()

    if changed:
        file1 = codecs.open(filename,'w','utf-8')
        file1.write(soup.prettify())
        file1.close()
        modified += 1
        print("    [UPDATED]")
    else:
        print("    [NO CHANGE]")

print("\n====================================")
print("Completed srcset cleanup")
print("Total files scanned : {}".format(processed))
print("Files modified      : {}".format(modified))
print("====================================\n")

# Pause so terminal does not close immediately
print("\nPress any key to exit...")

try:
    # Windows + Python2
    import msvcrt
    msvcrt.getch()
except ImportError:
    try:
        # Python3 Unix-like environments
        input()
    except:
        pass

Create subthread

All articles

Subject	Author	Date
picture tag with source subtags		03/24/2015 21:21
Re: picture tag with source subtags		06/01/2015 11:52
Re: picture tag with source subtags		01/05/2016 09:09
Re: picture tag with source subtags		06/22/2017 23:41
Re: picture tag with source subtags		09/20/2017 13:05
Re: picture tag with source subtags		06/27/2018 09:33
Re: picture tag with source subtags		03/23/2019 21:41
Re: picture tag with source subtags		11/03/2025 21:16
Re: picture tag with source subtags		11/03/2025 22:52