# from: https://www.reddit.com/r/DataHoarder/comments/1d90f3c/nsfw_best_way_to_download_hundreds_of_separate/
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import os
from concurrent.futures import ThreadPoolExecutor, as_completed


def fetch_webpage(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return None


def parse_galleries_from_root(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    main_content = soup.find('div', class_='flex-container')

    if main_content:
        links = main_content.find_all('a')

        urls = []
        for link in links:
            href = link.get('href')
            if href and 'https://bitch-show.com/gallery/' in href:
                urls.append(href)
        return urls


def parse_imagehost_urls_from_gallery(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    td_content = soup.find('div', class_='flex-container').find('td', class_='content')
    if td_content:
        links = td_content.find_all('a')

        urls = []
        for link in links:
            href = link.get('href')
            if href and 'https://www.imagebam.com/view/' in href:
                start_index = href.find('https://www.imagebam.com/view/')
                if start_index != -1:
                    full_url = href[start_index:]
                    urls.append(full_url)
        return urls

    else:
        print("No `td` element with class `content` found.")
        return []


def get_cookie_expiration():
    current_datetime = datetime.now()
    new_datetime = current_datetime + timedelta(hours=6)
    return new_datetime.strftime("%a, %d-%b-%Y %H:%M:%S GMT")


def fetch_webpage_with_cookie(url):
    expiration = get_cookie_expiration()
    cookies = {
        'nsfw_inter': '1',
        'expires': expiration,
        'path': '/'
    }

    with requests.Session() as session:
        response = session.get(url, cookies=cookies)
        if response.status_code == 200:
            return response.text
        else:
            print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
            return None


def parse_imagebam_page(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    image_tag = soup.find('img', class_='main-image')
    if image_tag:
        image_url = image_tag.get('src')
        return image_url
    else:
        print("No image found on the page.")
        return None


def download_image(url, save_dir):
    try:
        os.makedirs(save_dir, exist_ok=True)
        filename = url.split('/')[-1]
        save_path = os.path.join(save_dir, filename)

        response = requests.get(url)
        response.raise_for_status()

        if os.path.exists(save_path):
            print(f"{save_path} already exists.")
        else:
            with open(save_path, 'wb') as file:
                file.write(response.content)
            print(f"Image successfully downloaded and saved to {save_path}")
    except requests.exceptions.RequestException as e:
        print(f"Failed to download the image: {e}")


def download_gallery_images(gallery_url, save_root):
    print(f'Trying to download gallery at {gallery_url}')
    html_content = fetch_webpage(gallery_url)
    if html_content:
        urls = parse_imagehost_urls_from_gallery(html_content)
        gallery_name = gallery_url.split('/')[-1]
        save_dir = f'{save_root}{gallery_name}'

        with ThreadPoolExecutor(max_workers=10) as executor:
            futures = []
            for url in urls:
                futures.append(executor.submit(fetch_and_download_image, url, save_dir))

            for future in as_completed(futures):
                future.result()


def fetch_and_download_image(url, save_dir):
    try:
        imagebam_content = fetch_webpage_with_cookie(url)
        image_src = parse_imagebam_page(imagebam_content)
        if image_src:
            download_image(image_src, save_dir)
    except Exception as e:
        print(f"Failed to download the image: {e}")


# Main function
def main():
    save_root = './'
    scrape_root = 'https://bitch-show.com/page/'

    for i in range(1, 501):
        page_url = f'{scrape_root}{i}'
        print(f'Working on Page {i} at {page_url}')

        page_html_content = fetch_webpage(page_url)
        gallery_urls = parse_galleries_from_root(page_html_content)

        for gallery_url in gallery_urls:
            download_gallery_images(gallery_url, save_root)


if __name__ == "__main__":
    main()