diff --git a/update_laws.py b/update_laws.py new file mode 100644 index 00000000..edab0f39 --- /dev/null +++ b/update_laws.py @@ -0,0 +1,94 @@ +import feedparser +import requests +import xml.etree.ElementTree as ET +from urllib.parse import urlparse +import zipfile +import io +import os +from datetime import datetime, timedelta +import difflib +import hashlib + +RSS_URL = 'https://www.gesetze-im-internet.de/aktuDienst-rss-feed.xml' +TOC_URL = 'https://www.gesetze-im-internet.de/gii-toc.xml' +OUTPUT_DIR = 'laws' +LAST_UPDATE_FILE = 'last_update.txt' + +# Letztes globales Update laden +if os.path.exists(LAST_UPDATE_FILE): + with open(LAST_UPDATE_FILE, 'r') as f: + last_update = datetime.fromisoformat(f.read().strip()) +else: + last_update = datetime.now() - timedelta(days=1) + +feed = feedparser.parse(RSS_URL) +new_changes = [] + +for entry in feed.entries: + pub_date = datetime(*entry.published_parsed[:6]) + if pub_date > last_update: + title = entry.title.strip() + new_changes.append((title, pub_date.strftime('%Y-%m-%d'))) + +if not new_changes: + print("No new changes.") + exit(0) + +# TOC laden +toc_response = requests.get(TOC_URL) +toc_root = ET.fromstring(toc_response.content) +toc_laws = {item.find('title').text.strip(): item.find('link').text.strip() for item in toc_root.findall('item')} + +updated = False +for change_title, change_date in new_changes: + match = max(toc_laws.keys(), key=lambda t: difflib.SequenceMatcher(None, t.lower(), change_title.lower()).ratio(), default=None) + if match and difflib.SequenceMatcher(None, match.lower(), change_title.lower()).ratio() > 0.8: + link = toc_laws[match] + abbrev = os.path.basename(urlparse(link).path).replace('/xml.zip', '') + zip_url = link + + zip_response = requests.get(zip_url) + if zip_response.status_code == 200: + with zipfile.ZipFile(io.BytesIO(zip_response.content)) as z: + xml_files = [f for f in z.namelist() if f.endswith('.xml')] + if xml_files: + xml_content = z.read(xml_files[0]) + xml_root = ET.fromstring(xml_content) + + # Stand extrahieren + meta = xml_root.find('.//metadaten') + stand_comment = '' + stand_elem = meta.find('standangabe') + if stand_elem is not None: + comment_elem = stand_elem.find('standkommentar') + if comment_elem is not None: + stand_comment = ET.tostring(comment_elem, encoding='unicode', method='text').strip() + new_stand_hash = hashlib.md5(stand_comment.encode()).hexdigest() + + # Lokalen Stand checken (in last_stand.txt im law_dir) + law_dir = os.path.join(OUTPUT_DIR, abbrev) + os.makedirs(law_dir, exist_ok=True) + stand_file = os.path.join(law_dir, 'last_stand.txt') + old_stand_hash = '' + if os.path.exists(stand_file): + with open(stand_file, 'r') as f: + old_stand_hash = f.read().strip() + + if new_stand_hash != old_stand_hash: + # Datum extrahieren + date_str = meta.find('ausfertigung-datum').text if meta.find('ausfertigung-datum') is not None else change_date + + file_path = os.path.join(law_dir, f"{abbrev}_{change_date}_{new_stand_hash[:8]}.xml") + with open(file_path, 'wb') as f: + f.write(xml_content) + with open(stand_file, 'w') as f: + f.write(new_stand_hash) + print(f"Updated: {match} as {file_path}") + updated = True + +# Globales Update speichern +with open(LAST_UPDATE_FILE, 'w') as f: + f.write(datetime.now().isoformat()) + +if updated: + print("Changes committed.") \ No newline at end of file