import requests from bs4 import BeautifulSoup from urllib.parse import urljoin url = "https://www.igepa.de/" print(f"Fetching {url}...") try: headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'} response = requests.get(url, headers=headers, verify=False, timeout=15) print(f"Status: {response.status_code}") soup = BeautifulSoup(response.content, 'html.parser') print("\n--- Searching for Impressum Candidates ---") keywords = ["impressum", "imprint", "legal notice", "anbieterkennzeichnung", "rechtliches", "legal", "disclaimer"] found = False for a in soup.find_all('a', href=True): text = a.get_text().strip().lower() href = a['href'].lower() # print(f"Link: '{text}' -> {href}") # Verbose if any(kw in text for kw in keywords) or any(kw in href for kw in keywords): print(f"MATCH: Text='{text}' | Href='{href}'") found = True if not found: print("No matches found.") except Exception as e: print(f"Error: {e}")