import requests
from lxml import html
from bs4 import BeautifulSoup as soup
from urllib.parse import urlparse
import json

def getProductsByLink(url):
    httpRequest = requests.get(url)
    html = httpRequest.text
    parsedHTML = soup(html, "html.parser")
    products = parsedHTML.find("div", class_="products")
    if products is None:
        print("Error: " + url)
        return []
    productList=products.find_all("div", class_="prd")
    productLinks = []
    for element in productList:
        a=element.find("a",href=True)
        productLinks.append("https://www.penti.com"+a['href'])
    return productLinks

url = 'https://www.penti.com/tr/c/boxer'
httpRequest = requests.get(url)
html = httpRequest.text
parsedHTML = soup(html, "html.parser")
productCount = int((parsedHTML.find("div", class_="plp-info").text).split()[0])
pageCount = int(productCount / 42)
productLinks = []
for i in range(pageCount+1):
    page = i
    link = getProductsByLink(f'{url}?page={page}')
    if link is not None:
        productLinks.extend(link)
        
def getProductHTML(url):
    response = requests.get(url)
    html = response.text
    parsedHTML = soup(html, "html.parser")
    return parsedHTML

def getProductCodesAndSizes(parsedHTML,url):
    sizeDropdown = parsedHTML.find("div", class_="dropdown size-dropdown")
    sizeDropdownItems = sizeDropdown.find_all("a", class_="dropdown-item")
    sizes = [str(item['data-code']).replace(url.split('/')[-1].replace("-", ""),"") for item in sizeDropdownItems]
    return url.split('/')[-1].replace("-", ""), sizes


def getStoresAndSizes(productCode,sizes):
    data = {}
    for size in sizes:
        data.update({size:""})
        url = "https://www.penti.com/tr/store-finder/find-store"
        payload = {
            'productCode': productCode + size,
            'cityCode': '34',
            'townCode': '0',
            'pageType': 'PRODUCT'
        }
        response = requests.post(url, data=payload)
        if response.status_code == 200:
            html = response.text
            parsedHTML = soup(html, "html.parser")
            sli = parsedHTML.find_all("div", class_="sli")
            data[size] = {"satus": "success", "stores": []}
            for store in sli:
                info = store.find("div", class_="sli-info-left")
                if info is not None:
                    name = info.find("div", class_="sli-title").get_text(strip=True)
                    
                    whitelist = [
                        "beylikdüzü",
                        "avcılar",
                        "esenyurt",
                        "büyükçekmece",
                        "bahçelievler"
                    ]
                    
                    location = info.find("div", class_="sli-address").get_text(strip=True)
                    if any(whitelisted in location.lower() for whitelisted in whitelist):
                        if "stores" not in data[size]:
                            data[size]["stores"] = []
                        data[size]["stores"].append({
                            "name": name,
                            "location": location
                        })
            if not data[size]["stores"]:
                data[size]["status"] = "failed"
        else:
            data[size]={
                "status": "failed",
            }
    
    keys_to_delete = [size for size, info in data.items() if info.get("status") == "failed"]
    for size in keys_to_delete:
        del data[size]
    
    sizes=[]
    for key in data.keys():
        sizes.append(key)
    return data,sizes

def getImageLinks(parsedHTML):
    gallery = parsedHTML.find('div', class_='pdp-gallery')
    image_tags = gallery.find_all('img')
    image_links = []
    for img in image_tags:
        if img.get('src'):
            image_links.append(img['src'])
        elif img.get('srcset'):
            image_links.append(img['srcset'])
        elif img.get('data-srcset'):
            image_links.append(img['data-srcset'])
    return image_links

def getTitle(parsedHTML):
    title = parsedHTML.find('h1', class_='pdp-title')
    return(title.text.strip())


def getProductInfo(url):
    parsedHTML = getProductHTML(url)
    productCode,sizes = getProductCodesAndSizes(parsedHTML,url)
    stores, sizes = getStoresAndSizes(productCode,sizes)
    imageLinks = getImageLinks(parsedHTML)
    title = getTitle(parsedHTML)
    
    data = {
        "title": title,
        "url": url,
        "imageLinks": imageLinks,
        "stores": stores,
    }
    return data

with open('data.json', 'w') as file:
    file.write("[\n")

for index, url in enumerate(productLinks):
    product_info = getProductInfo(url)
    with open('data.json', 'a') as file:
        json.dump(product_info, file, indent=4)
        if index < len(productLinks) - 1:
            file.write(",")
        file.write("\n")
    print(str(int((index+1)*100/len(productLinks))) + "% -> " + url)
        
with open('data.json', 'a') as file:
    file.write("\n]")