Compare commits
2 Commits
6de2d2020b
...
73c1f5273d
| Author | SHA1 | Date | |
|---|---|---|---|
| 73c1f5273d | |||
| 34e44d80fb |
76
.gitea/workflows/update-laws.yaml
Normal file
76
.gitea/workflows/update-laws.yaml
Normal file
@@ -0,0 +1,76 @@
|
||||
name: Update Laws from RSS
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Täglich um 02:00 UTC (03:00 MEZ / 04:00 MESZ)
|
||||
- cron: '0 2 * * *'
|
||||
# Manueller Trigger für Tests
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
update:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Vollständige Historie für Git-Operationen
|
||||
persist-credentials: true # Credentials für Push speichern
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
|
||||
- name: Run update script
|
||||
run: |
|
||||
python3 update_laws.py
|
||||
continue-on-error: true # Weiterlaufen auch bei Fehlern
|
||||
|
||||
- name: Convert XML to Markdown
|
||||
if: success() || failure() # Immer ausführen, auch wenn Update fehlschlug
|
||||
run: |
|
||||
python3 xml_to_markdown.py --prod || echo "Konvertierung fehlgeschlagen oder keine Änderungen"
|
||||
continue-on-error: true
|
||||
|
||||
- name: Configure Git
|
||||
run: |
|
||||
git config --global user.name 'LawGit Bot'
|
||||
git config --global user.email 'bot@git.coded.law'
|
||||
git config --global init.defaultBranch main
|
||||
|
||||
- name: Check for changes
|
||||
id: check_changes
|
||||
run: |
|
||||
git add -A
|
||||
if git diff --staged --quiet; then
|
||||
echo "changed=false" >> $GITHUB_OUTPUT
|
||||
echo "Keine Änderungen gefunden"
|
||||
else
|
||||
echo "changed=true" >> $GITHUB_OUTPUT
|
||||
echo "Änderungen gefunden"
|
||||
git status
|
||||
fi
|
||||
|
||||
- name: Commit and push changes
|
||||
if: steps.check_changes.outputs.changed == 'true'
|
||||
run: |
|
||||
git commit -m "Daily law updates from RSS - $(date +'%Y-%m-%d %H:%M:%S UTC')" || exit 0
|
||||
git push origin main || echo "Push fehlgeschlagen"
|
||||
|
||||
- name: Summary
|
||||
if: always()
|
||||
run: |
|
||||
echo "## Update-Zusammenfassung"
|
||||
echo "- Workflow ausgeführt: $(date)"
|
||||
if [ "${{ steps.check_changes.outputs.changed }}" == "true" ]; then
|
||||
echo "- ✅ Änderungen wurden committed und gepusht"
|
||||
else
|
||||
echo "- ℹ️ Keine Änderungen gefunden"
|
||||
fi
|
||||
|
||||
94
update_laws.py
Normal file
94
update_laws.py
Normal file
@@ -0,0 +1,94 @@
|
||||
import feedparser
|
||||
import requests
|
||||
import xml.etree.ElementTree as ET
|
||||
from urllib.parse import urlparse
|
||||
import zipfile
|
||||
import io
|
||||
import os
|
||||
from datetime import datetime, timedelta
|
||||
import difflib
|
||||
import hashlib
|
||||
|
||||
RSS_URL = 'https://www.gesetze-im-internet.de/aktuDienst-rss-feed.xml'
|
||||
TOC_URL = 'https://www.gesetze-im-internet.de/gii-toc.xml'
|
||||
OUTPUT_DIR = 'laws'
|
||||
LAST_UPDATE_FILE = 'last_update.txt'
|
||||
|
||||
# Letztes globales Update laden
|
||||
if os.path.exists(LAST_UPDATE_FILE):
|
||||
with open(LAST_UPDATE_FILE, 'r') as f:
|
||||
last_update = datetime.fromisoformat(f.read().strip())
|
||||
else:
|
||||
last_update = datetime.now() - timedelta(days=1)
|
||||
|
||||
feed = feedparser.parse(RSS_URL)
|
||||
new_changes = []
|
||||
|
||||
for entry in feed.entries:
|
||||
pub_date = datetime(*entry.published_parsed[:6])
|
||||
if pub_date > last_update:
|
||||
title = entry.title.strip()
|
||||
new_changes.append((title, pub_date.strftime('%Y-%m-%d')))
|
||||
|
||||
if not new_changes:
|
||||
print("No new changes.")
|
||||
exit(0)
|
||||
|
||||
# TOC laden
|
||||
toc_response = requests.get(TOC_URL)
|
||||
toc_root = ET.fromstring(toc_response.content)
|
||||
toc_laws = {item.find('title').text.strip(): item.find('link').text.strip() for item in toc_root.findall('item')}
|
||||
|
||||
updated = False
|
||||
for change_title, change_date in new_changes:
|
||||
match = max(toc_laws.keys(), key=lambda t: difflib.SequenceMatcher(None, t.lower(), change_title.lower()).ratio(), default=None)
|
||||
if match and difflib.SequenceMatcher(None, match.lower(), change_title.lower()).ratio() > 0.8:
|
||||
link = toc_laws[match]
|
||||
abbrev = os.path.basename(urlparse(link).path).replace('/xml.zip', '')
|
||||
zip_url = link
|
||||
|
||||
zip_response = requests.get(zip_url)
|
||||
if zip_response.status_code == 200:
|
||||
with zipfile.ZipFile(io.BytesIO(zip_response.content)) as z:
|
||||
xml_files = [f for f in z.namelist() if f.endswith('.xml')]
|
||||
if xml_files:
|
||||
xml_content = z.read(xml_files[0])
|
||||
xml_root = ET.fromstring(xml_content)
|
||||
|
||||
# Stand extrahieren
|
||||
meta = xml_root.find('.//metadaten')
|
||||
stand_comment = ''
|
||||
stand_elem = meta.find('standangabe')
|
||||
if stand_elem is not None:
|
||||
comment_elem = stand_elem.find('standkommentar')
|
||||
if comment_elem is not None:
|
||||
stand_comment = ET.tostring(comment_elem, encoding='unicode', method='text').strip()
|
||||
new_stand_hash = hashlib.md5(stand_comment.encode()).hexdigest()
|
||||
|
||||
# Lokalen Stand checken (in last_stand.txt im law_dir)
|
||||
law_dir = os.path.join(OUTPUT_DIR, abbrev)
|
||||
os.makedirs(law_dir, exist_ok=True)
|
||||
stand_file = os.path.join(law_dir, 'last_stand.txt')
|
||||
old_stand_hash = ''
|
||||
if os.path.exists(stand_file):
|
||||
with open(stand_file, 'r') as f:
|
||||
old_stand_hash = f.read().strip()
|
||||
|
||||
if new_stand_hash != old_stand_hash:
|
||||
# Datum extrahieren
|
||||
date_str = meta.find('ausfertigung-datum').text if meta.find('ausfertigung-datum') is not None else change_date
|
||||
|
||||
file_path = os.path.join(law_dir, f"{abbrev}_{change_date}_{new_stand_hash[:8]}.xml")
|
||||
with open(file_path, 'wb') as f:
|
||||
f.write(xml_content)
|
||||
with open(stand_file, 'w') as f:
|
||||
f.write(new_stand_hash)
|
||||
print(f"Updated: {match} as {file_path}")
|
||||
updated = True
|
||||
|
||||
# Globales Update speichern
|
||||
with open(LAST_UPDATE_FILE, 'w') as f:
|
||||
f.write(datetime.now().isoformat())
|
||||
|
||||
if updated:
|
||||
print("Changes committed.")
|
||||
Reference in New Issue
Block a user