Feature: Add path filter for Forgejo repository imports
- Added path_filter parameter to fetch_forgejo_repo_markdown_files() - Filter supports wildcard patterns (e.g., 'posts/*/index.md') - Fixed hardcoded base URL - now detects git.rpi-virtuell.de and other instances - Added --filter command line option for repo mode - Updated help text with filter examples - Enables selective import of specific markdown files from repository Usage: python workflow.py --repo <URL> [branch] --filter 'Website/content/posts/*/index.md' Example: Imports only index.md files from posts subdirectories (59 files found)
This commit is contained in:
parent
98d7244e9d
commit
86717185c4
1 changed files with 40 additions and 10 deletions
46
workflow.py
46
workflow.py
|
|
@ -279,13 +279,15 @@ def process_post(wp_api: WordPressAPI, post_config: Dict[str, Any],
|
||||||
return post_id
|
return post_id
|
||||||
|
|
||||||
|
|
||||||
def fetch_forgejo_repo_markdown_files(repo_url: str, branch: str = 'main') -> List[str]:
|
def fetch_forgejo_repo_markdown_files(repo_url: str, branch: str = 'main',
|
||||||
|
path_filter: str = None) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Holt alle Markdown-URLs aus einem Forgejo-Repository
|
Holt alle Markdown-URLs aus einem Forgejo-Repository
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
repo_url: URL zum Repository (z.B. https://codeberg.org/user/repo)
|
repo_url: URL zum Repository (z.B. https://codeberg.org/user/repo)
|
||||||
branch: Branch-Name (Standard: main)
|
branch: Branch-Name (Standard: main)
|
||||||
|
path_filter: Optionaler Filter für Pfade (z.B. 'posts/*/index.md')
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Liste von URLs zu Markdown-Dateien
|
Liste von URLs zu Markdown-Dateien
|
||||||
|
|
@ -302,17 +304,23 @@ def fetch_forgejo_repo_markdown_files(repo_url: str, branch: str = 'main') -> Li
|
||||||
owner = parts[-2]
|
owner = parts[-2]
|
||||||
repo = parts[-1]
|
repo = parts[-1]
|
||||||
|
|
||||||
# API-URL ermitteln
|
# Basis-URL ermitteln (für Raw-URLs)
|
||||||
if 'codeberg.org' in repo_url:
|
if 'codeberg.org' in repo_url:
|
||||||
|
base_url = 'https://codeberg.org'
|
||||||
api_base = 'https://codeberg.org/api/v1'
|
api_base = 'https://codeberg.org/api/v1'
|
||||||
|
elif 'git.rpi-virtuell.de' in repo_url:
|
||||||
|
base_url = 'https://git.rpi-virtuell.de'
|
||||||
|
api_base = 'https://git.rpi-virtuell.de/api/v1'
|
||||||
elif 'gitea' in repo_url or 'forgejo' in repo_url:
|
elif 'gitea' in repo_url or 'forgejo' in repo_url:
|
||||||
# Generischer Ansatz für selbst-gehostete Instanzen
|
# Generischer Ansatz für selbst-gehostete Instanzen
|
||||||
base_parts = repo_url.split('/')[:3]
|
base_parts = repo_url.split('/')[:3]
|
||||||
api_base = '/'.join(base_parts) + '/api/v1'
|
base_url = '/'.join(base_parts)
|
||||||
|
api_base = base_url + '/api/v1'
|
||||||
else:
|
else:
|
||||||
print(f"Warnung: Unbekannte Forgejo-Instanz, versuche generischen API-Pfad")
|
print(f"Warnung: Unbekannte Forgejo-Instanz, versuche generischen API-Pfad")
|
||||||
base_parts = repo_url.split('/')[:3]
|
base_parts = repo_url.split('/')[:3]
|
||||||
api_base = '/'.join(base_parts) + '/api/v1'
|
base_url = '/'.join(base_parts)
|
||||||
|
api_base = base_url + '/api/v1'
|
||||||
|
|
||||||
api_url = f"{api_base}/repos/{owner}/{repo}/git/trees/{branch}?recursive=true"
|
api_url = f"{api_base}/repos/{owner}/{repo}/git/trees/{branch}?recursive=true"
|
||||||
|
|
||||||
|
|
@ -324,8 +332,17 @@ def fetch_forgejo_repo_markdown_files(repo_url: str, branch: str = 'main') -> Li
|
||||||
markdown_files = []
|
markdown_files = []
|
||||||
for item in data.get('tree', []):
|
for item in data.get('tree', []):
|
||||||
if item['type'] == 'blob' and item['path'].endswith('.md'):
|
if item['type'] == 'blob' and item['path'].endswith('.md'):
|
||||||
|
# Filter anwenden falls vorhanden
|
||||||
|
if path_filter:
|
||||||
|
# Konvertiere Wildcard-Filter zu regex-Pattern
|
||||||
|
# z.B. 'posts/*/index.md' -> '^posts/[^/]+/index\.md$'
|
||||||
|
import re
|
||||||
|
pattern = path_filter.replace('*', '[^/]+').replace('.', r'\.')
|
||||||
|
if not re.match(f'^{pattern}$', item['path']):
|
||||||
|
continue
|
||||||
|
|
||||||
# Raw-URL konstruieren
|
# Raw-URL konstruieren
|
||||||
raw_url = f"https://codeberg.org/{owner}/{repo}/raw/branch/{branch}/{item['path']}"
|
raw_url = f"{base_url}/{owner}/{repo}/raw/branch/{branch}/{item['path']}"
|
||||||
markdown_files.append(raw_url)
|
markdown_files.append(raw_url)
|
||||||
|
|
||||||
return markdown_files
|
return markdown_files
|
||||||
|
|
@ -385,12 +402,21 @@ def main():
|
||||||
repo_index = sys.argv.index('--forgejo-repo') if '--forgejo-repo' in sys.argv else sys.argv.index('--repo')
|
repo_index = sys.argv.index('--forgejo-repo') if '--forgejo-repo' in sys.argv else sys.argv.index('--repo')
|
||||||
if len(sys.argv) > repo_index + 1:
|
if len(sys.argv) > repo_index + 1:
|
||||||
repo_url = sys.argv[repo_index + 1]
|
repo_url = sys.argv[repo_index + 1]
|
||||||
branch = sys.argv[repo_index + 2] if len(sys.argv) > repo_index + 2 else 'main'
|
branch = sys.argv[repo_index + 2] if len(sys.argv) > repo_index + 2 and not sys.argv[repo_index + 2].startswith('--') else 'main'
|
||||||
|
|
||||||
|
# Pfad-Filter für spezifische Dateien (z.B. nur index.md in posts/)
|
||||||
|
path_filter = None
|
||||||
|
if '--filter' in sys.argv:
|
||||||
|
filter_index = sys.argv.index('--filter')
|
||||||
|
if len(sys.argv) > filter_index + 1:
|
||||||
|
path_filter = sys.argv[filter_index + 1]
|
||||||
|
|
||||||
print(f"Forgejo-Modus: Verarbeite Repository: {repo_url}")
|
print(f"Forgejo-Modus: Verarbeite Repository: {repo_url}")
|
||||||
print(f"Branch: {branch}")
|
print(f"Branch: {branch}")
|
||||||
|
if path_filter:
|
||||||
|
print(f"Filter: {path_filter}")
|
||||||
|
|
||||||
markdown_urls = fetch_forgejo_repo_markdown_files(repo_url, branch)
|
markdown_urls = fetch_forgejo_repo_markdown_files(repo_url, branch, path_filter)
|
||||||
|
|
||||||
if not markdown_urls:
|
if not markdown_urls:
|
||||||
print("Keine Markdown-Dateien im Repository gefunden")
|
print("Keine Markdown-Dateien im Repository gefunden")
|
||||||
|
|
@ -452,7 +478,11 @@ def main():
|
||||||
print("\nVerwendung:")
|
print("\nVerwendung:")
|
||||||
print(" python workflow.py [config.yaml] # YAML-Konfiguration")
|
print(" python workflow.py [config.yaml] # YAML-Konfiguration")
|
||||||
print(" python workflow.py <URL> # Einzelne Markdown-URL")
|
print(" python workflow.py <URL> # Einzelne Markdown-URL")
|
||||||
print(" python workflow.py --repo <REPO_URL> [branch] # Forgejo-Repository")
|
print(" python workflow.py --repo <REPO_URL> [branch] # Forgejo-Repository (alle .md)")
|
||||||
|
print(" python workflow.py --repo <REPO_URL> [branch] --filter <pattern> # Mit Filter")
|
||||||
|
print("\nFilter-Beispiele:")
|
||||||
|
print(" --filter 'posts/*/index.md' # Nur index.md in posts/-Unterverzeichnissen")
|
||||||
|
print(" --filter 'Website/content/posts/*/index.md' # Mit vollständigem Pfad")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
print(f"Lade Konfiguration aus: {config_file}")
|
print(f"Lade Konfiguration aus: {config_file}")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue