[30388f42] Infrastructure Hardening: Repaired CE/Connector DB schema, fixed frontend styling build, implemented robust echo shield in worker v2.1.1, and integrated Lead Engine into gateway.
This commit is contained in:
34
ARCHIVE_legacy_scripts/debug_igepa.py
Normal file
34
ARCHIVE_legacy_scripts/debug_igepa.py
Normal file
@@ -0,0 +1,34 @@
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urljoin
|
||||
|
||||
url = "https://www.igepa.de/"
|
||||
print(f"Fetching {url}...")
|
||||
|
||||
try:
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
|
||||
response = requests.get(url, headers=headers, verify=False, timeout=15)
|
||||
print(f"Status: {response.status_code}")
|
||||
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
|
||||
print("\n--- Searching for Impressum Candidates ---")
|
||||
keywords = ["impressum", "imprint", "legal notice", "anbieterkennzeichnung", "rechtliches", "legal", "disclaimer"]
|
||||
|
||||
found = False
|
||||
for a in soup.find_all('a', href=True):
|
||||
text = a.get_text().strip().lower()
|
||||
href = a['href'].lower()
|
||||
|
||||
# print(f"Link: '{text}' -> {href}") # Verbose
|
||||
|
||||
if any(kw in text for kw in keywords) or any(kw in href for kw in keywords):
|
||||
print(f"MATCH: Text='{text}' | Href='{href}'")
|
||||
found = True
|
||||
|
||||
if not found:
|
||||
print("No matches found.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
Reference in New Issue
Block a user