Brancheneinstufung2/debug_igepa.py


import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

url = "https://www.igepa.de/"
print(f"Fetching {url}...")

try:
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
    response = requests.get(url, headers=headers, verify=False, timeout=15)
    print(f"Status: {response.status_code}")

    soup = BeautifulSoup(response.content, 'html.parser')

    print("\n--- Searching for Impressum Candidates ---")
    keywords = ["impressum", "imprint", "legal notice", "anbieterkennzeichnung", "rechtliches", "legal", "disclaimer"]

    found = False
    for a in soup.find_all('a', href=True):
        text = a.get_text().strip().lower()
        href = a['href'].lower()

        # print(f"Link: '{text}' -> {href}") # Verbose

        if any(kw in text for kw in keywords) or any(kw in href for kw in keywords):
            print(f"MATCH: Text='{text}' | Href='{href}'")
            found = True

    if not found:
        print("No matches found.")

except Exception as e:
    print(f"Error: {e}")