Fix: GITHUB_OUTPUT und persist-credentials für Runner

Add script to update laws from RSS feed and TOC
2025-11-19 13:54:43 +01:00 · 2025-11-18 14:45:47 +01:00
2 changed files with 170 additions and 0 deletions
--- a/.gitea/workflows/update-laws.yaml
+++ b/.gitea/workflows/update-laws.yaml
@@ -0,0 +1,76 @@
+name: Update Laws from RSS
+
+on:
+  schedule:
+    # Täglich um 02:00 UTC (03:00 MEZ / 04:00 MESZ)
+    - cron: '0 2 * * *'
+  # Manueller Trigger für Tests
+  workflow_dispatch:
+
+jobs:
+  update:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Vollständige Historie für Git-Operationen
+          persist-credentials: true  # Credentials für Push speichern
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Run update script
+        run: |
+          python3 update_laws.py
+        continue-on-error: true  # Weiterlaufen auch bei Fehlern
+
+      - name: Convert XML to Markdown
+        if: success() || failure()  # Immer ausführen, auch wenn Update fehlschlug
+        run: |
+          python3 xml_to_markdown.py --prod || echo "Konvertierung fehlgeschlagen oder keine Änderungen"
+        continue-on-error: true
+
+      - name: Configure Git
+        run: |
+          git config --global user.name 'LawGit Bot'
+          git config --global user.email 'bot@git.coded.law'
+          git config --global init.defaultBranch main
+
+      - name: Check for changes
+        id: check_changes
+        run: |
+          git add -A
+          if git diff --staged --quiet; then
+            echo "changed=false" >> $GITHUB_OUTPUT
+            echo "Keine Änderungen gefunden"
+          else
+            echo "changed=true" >> $GITHUB_OUTPUT
+            echo "Änderungen gefunden"
+            git status
+          fi
+
+      - name: Commit and push changes
+        if: steps.check_changes.outputs.changed == 'true'
+        run: |
+          git commit -m "Daily law updates from RSS - $(date +'%Y-%m-%d %H:%M:%S UTC')" || exit 0
+          git push origin main || echo "Push fehlgeschlagen"
+
+      - name: Summary
+        if: always()
+        run: |
+          echo "## Update-Zusammenfassung"
+          echo "- Workflow ausgeführt: $(date)"
+          if [ "${{ steps.check_changes.outputs.changed }}" == "true" ]; then
+            echo "- ✅ Änderungen wurden committed und gepusht"
+          else
+            echo "- ℹ️ Keine Änderungen gefunden"
+          fi
+
--- a/update_laws.py
+++ b/update_laws.py
@@ -0,0 +1,94 @@
+import feedparser
+import requests
+import xml.etree.ElementTree as ET
+from urllib.parse import urlparse
+import zipfile
+import io
+import os
+from datetime import datetime, timedelta
+import difflib
+import hashlib
+
+RSS_URL = 'https://www.gesetze-im-internet.de/aktuDienst-rss-feed.xml'
+TOC_URL = 'https://www.gesetze-im-internet.de/gii-toc.xml'
+OUTPUT_DIR = 'laws'
+LAST_UPDATE_FILE = 'last_update.txt'
+
+# Letztes globales Update laden
+if os.path.exists(LAST_UPDATE_FILE):
+    with open(LAST_UPDATE_FILE, 'r') as f:
+        last_update = datetime.fromisoformat(f.read().strip())
+else:
+    last_update = datetime.now() - timedelta(days=1)
+
+feed = feedparser.parse(RSS_URL)
+new_changes = []
+
+for entry in feed.entries:
+    pub_date = datetime(*entry.published_parsed[:6])
+    if pub_date > last_update:
+        title = entry.title.strip()
+        new_changes.append((title, pub_date.strftime('%Y-%m-%d')))
+
+if not new_changes:
+    print("No new changes.")
+    exit(0)
+
+# TOC laden
+toc_response = requests.get(TOC_URL)
+toc_root = ET.fromstring(toc_response.content)
+toc_laws = {item.find('title').text.strip(): item.find('link').text.strip() for item in toc_root.findall('item')}
+
+updated = False
+for change_title, change_date in new_changes:
+    match = max(toc_laws.keys(), key=lambda t: difflib.SequenceMatcher(None, t.lower(), change_title.lower()).ratio(), default=None)
+    if match and difflib.SequenceMatcher(None, match.lower(), change_title.lower()).ratio() > 0.8:
+        link = toc_laws[match]
+        abbrev = os.path.basename(urlparse(link).path).replace('/xml.zip', '')
+        zip_url = link
+
+        zip_response = requests.get(zip_url)
+        if zip_response.status_code == 200:
+            with zipfile.ZipFile(io.BytesIO(zip_response.content)) as z:
+                xml_files = [f for f in z.namelist() if f.endswith('.xml')]
+                if xml_files:
+                    xml_content = z.read(xml_files[0])
+                    xml_root = ET.fromstring(xml_content)
+
+                    # Stand extrahieren
+                    meta = xml_root.find('.//metadaten')
+                    stand_comment = ''
+                    stand_elem = meta.find('standangabe')
+                    if stand_elem is not None:
+                        comment_elem = stand_elem.find('standkommentar')
+                        if comment_elem is not None:
+                            stand_comment = ET.tostring(comment_elem, encoding='unicode', method='text').strip()
+                    new_stand_hash = hashlib.md5(stand_comment.encode()).hexdigest()
+
+                    # Lokalen Stand checken (in last_stand.txt im law_dir)
+                    law_dir = os.path.join(OUTPUT_DIR, abbrev)
+                    os.makedirs(law_dir, exist_ok=True)
+                    stand_file = os.path.join(law_dir, 'last_stand.txt')
+                    old_stand_hash = ''
+                    if os.path.exists(stand_file):
+                        with open(stand_file, 'r') as f:
+                            old_stand_hash = f.read().strip()
+
+                    if new_stand_hash != old_stand_hash:
+                        # Datum extrahieren
+                        date_str = meta.find('ausfertigung-datum').text if meta.find('ausfertigung-datum') is not None else change_date
+
+                        file_path = os.path.join(law_dir, f"{abbrev}_{change_date}_{new_stand_hash[:8]}.xml")
+                        with open(file_path, 'wb') as f:
+                            f.write(xml_content)
+                        with open(stand_file, 'w') as f:
+                            f.write(new_stand_hash)
+                        print(f"Updated: {match} as {file_path}")
+                        updated = True
+
+# Globales Update speichern
+with open(LAST_UPDATE_FILE, 'w') as f:
+    f.write(datetime.now().isoformat())
+
+if updated:
+    print("Changes committed.")
Author	SHA1	Message	Date
Michael Hermann	73c1f5273d	Fix: GITHUB_OUTPUT und persist-credentials für Runner	2025-11-19 13:54:43 +01:00
Michael Hermann	34e44d80fb	Add script to update laws from RSS feed and TOC	2025-11-18 14:45:47 +01:00