#!/usr/bin/env python3 # -*- coding: utf-8 -*- import scrapy import re import json import io import itertools as IT from unicode_tr import unicode_tr from bot.items import BotItem from unicode_tr.extras import slugify from time import gmtime, strftime from scrapy import Request from scrapy.spiders import SitemapSpider from scrapy.spiders import XMLFeedSpider from scrapy.spiders import CrawlSpider from scrapy_selenium import SeleniumRequest class Temu(CrawlSpider): name = "temu" allowed_domains = ["temu.com"] ELASTICSEARCH_INDEX = "temu" headers = { "Accept": "application/json, text/plain, */*", "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7", "Anti-Content": "0aqWtqlUXiQ8j9uZ7jzB_ldQN44XKQUf2EmjiA8F5fXFo9S4BtztpV24zFJND0xxAWR-S8XsxeqfvmW94eF26TzKwR2MgHa9BzCKgcsbVPw7dbACWWNvxzB0m-b003G0CiSdrMiMkhodc_wqUd0m2ZBvWVpbZ0Lwnd6VsKe5MqOZYXMIqGzTVhNkvxGvR_a9cYgtuFF2yiHsIJMtI2k3ZdHBDj1DayuyDdFUJxft6_RTK9sCg-QakORR3X8pJFK4UNPIoaRDx-HArQEnLyBmBKj6juuxNs8fAZWxZzuu8O7fDk-FDRdcAL0KbudQMmU0QTC13BK5jT6QdvuJvd-XFO4vWdKpaAZvYAEysoZ85cK3l1XItLMPXHwJ8PpusZBtqbIrrwLrAjT5Xnfy2enfH6sMMsMeU4a3WHMl65TCIukW3jnqHyy1UYALTCnKl1Y911yCTObmVABfRAemZ36jpdKzXQg5YWmjJTl1Ps9EzK0J6CHucVfK8RS807Z3ijRRGzWaOJ3yLTqgQCqFQ6sIke50", "Cache-Control": "no-cache", "Connection": "keep-alive", "Content-Type": "application/json;charset=UTF-8", "Origin": "https://www.temu.com", "Pragma": "no-cache", "Referer": "https://www.temu.com/category.html?opt_id=-13&opt_level=1&title=%C3%96ne%20%C3%A7%C4%B1kan&_x_enter_scene_type=cate_tab&show_search_type=3&child_opt_list=26%2C29%2C30%2C31%2C32%2C33%2C34%2C68%2C72%2C180%2C181%2C205%2C219%2C260%2C261%2C262%2C263%2C322%2C324%2C354%2C355%2C356%2C365%2C426%2C436%2C499%2C500%2C517&leaf_type=son&refer_page_el_sn=200053&refer_page_name=home&refer_page_id=10005_1719691494137_jy4eeen0tx&refer_page_sn=10005&_x_sessn_id=eerom9rgn9", "Sec-Fetch-Dest": "empty", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-origin", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36", "sec-ch-ua": "\"Not/A)Brand\";v=\"8\", \"Chromium\";v=\"126\", \"Google Chrome\";v=\"126\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"" } cookies = { "region": "203", "language": "tr", "currency": "TRY", "api_uid": "CnBYeWaAZUiDtgBhkRzIAg==", "timezone": "Europe%2FIstanbul", "_nano_fp": "XpmalpEbX0d8X5danT_4jmMdwBsl1tFZk8ndT8rY", "_bee": "k42JGozj8KWavMxqQKjyl1Y6BsV0Eiyw", "njrpl": "k42JGozj8KWavMxqQKjyl1Y6BsV0Eiyw", "dilx": "3eJzo5QlI9s5HOFYcmlH4", "hfsc": "L3yLcY856Tv/0ZPIeA==", "privacy_setting": "111", "_device_tag": "CgI2WRIIWG9MU3RkbnkaMHA3cYyj/hozl5o0glC4GYr0GLndX4kboA3OiF02Iw5rk64ZYogVSw6JgFF0lRzKnjAC", "AccessToken": "24S76M4D2QR3QLT23TMXWIXZC3KHHDSLT57DWTPIZCVZS75CJYRA0110cb22a769", "user_uin": "BAAWK5CQW3CD5NVQE7YW5YVX2QMHY775HMK7VV3L", "isLogin": "1719690587475", "_ttc": "3.NkgikHfskSiv.1751226588", "webp": "1", "goods": "goods_620tw9", "__cf_bm": "6n1ypHcDaAwkwT7o.B3E8nm4CD0sxNWR.BqxLJwSssw-1719693666-1.0.1.1-ChHZOj8WaX3neV6U8AldP5RDa8hwnqT8uTnV2NEsQvombpZGL4tSwGd5WM0pnIDBaSwnW7GsHdEvAMHpJcK8lw" } body = { "scene": "opt", "pageSn": 10012, "offset": 0, "pageSize": 80, "pagelistId": "5af6e75d24b4416eadc37c7c7fd22c4a", "optId": -13, "listId": "category_list_aa96cd982ce44bbdb1603d763394eea8", "filterItems": "", "childOptList": "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511,512,513,514,515,516,517" } url = 'https://www.temu.com/api/poppy/v1/opt?scene=opt' def start_requests(self): return [ scrapy.FormRequest( url=self.url, method='POST', dont_filter=True, cookies=self.cookies, headers=self.headers, body=json.dumps(self.body), callback=self.parse ) ] def parse(self, response): body = response.body data = json.loads(body) #print(data) if data["result"]["data"]["goods_list"]: for x in data["result"]["data"]["goods_list"]: self.parse_parca(x) print("asdasd") self.body['offset'] += 80 yield self.next_request() def next_request(self): return Request( url=self.url, method='POST', dont_filter=True, cookies=self.cookies, headers=self.headers, body=json.dumps(self.body), callback=self.parse ) def parse_parca(self, x): print("VERIIIIIII") return null print(x) # locale.setlocale(locale.LC_ALL, "tr_TR.utf-8") item = BotItem() print('################### NEW ITEM ###########################') pattern_kodu = r'showVariants":.*?,"id":(.*?),"productCode' kod = x["goods_id"] if len(kod) >= 1: item['urun_kodu'] = ''.join(["temu-", kod[0].strip()]) else: print('################### Kapandi ###########################') raise StopIteration item['urun_isim'] = x['title'] item['urun_marka'] = 'TEMU' item['urun_aciklama'] = ' ' item['urun_fiyat'] = x["price_info"]["price_str"].strip().replace(",", ".").replace(" TL", "") item['urun_eski_fiyat'] = x["price_info"]["market_price_str"].strip().replace(",", ".").replace(" TL", "") if item['urun_eski_fiyat'] == item['urun_fiyat']: item['urun_eski_fiyat'] = 0 item['urun_ilk_resim'] = x["thumb_url"] item['urun_link'] = x["seo_link_url"] item['urun_satici'] = 'TEMU' item['urun_tarih'] = strftime(("%Y-%m-%dT%H:%M:%SZ"), gmtime()) item['urun_cinsiyet'] = x["p_rec"]["opt_id"] print(item) yield item