import re data = [ "http://https://example.com/test_page", "http://https://mywebsite.org/homepage" ] def extract_clean_urls(data): clean_urls = [] for text in data: match = re.search(r'(http://https://[a-zA-Z0-9./_-]+)', text) if match: url = match.group(1) clean_url = re.sub(r'[^a-z0-9._/-]', '', url.lower()) clean_urls.append(clean_url) return clean_urls clean_urls = extract_clean_urls(data) for url in clean_urls: print(url)