Feature: Add path filter for Forgejo repository imports
- Added path_filter parameter to fetch_forgejo_repo_markdown_files() - Filter supports wildcard patterns (e.g., 'posts/*/index.md') - Fixed hardcoded base URL - now detects git.rpi-virtuell.de and other instances - Added --filter command line option for repo mode - Updated help text with filter examples - Enables selective import of specific markdown files from repository Usage: python workflow.py --repo <URL> [branch] --filter 'Website/content/posts/*/index.md' Example: Imports only index.md files from posts subdirectories (59 files found)
This commit is contained in:
parent
98d7244e9d
commit
86717185c4
1 changed files with 40 additions and 10 deletions
50
workflow.py
50
workflow.py
|
|
@ -279,13 +279,15 @@ def process_post(wp_api: WordPressAPI, post_config: Dict[str, Any],
|
|||
return post_id
|
||||
|
||||
|
||||
def fetch_forgejo_repo_markdown_files(repo_url: str, branch: str = 'main') -> List[str]:
|
||||
def fetch_forgejo_repo_markdown_files(repo_url: str, branch: str = 'main',
|
||||
path_filter: str = None) -> List[str]:
|
||||
"""
|
||||
Holt alle Markdown-URLs aus einem Forgejo-Repository
|
||||
|
||||
Args:
|
||||
repo_url: URL zum Repository (z.B. https://codeberg.org/user/repo)
|
||||
branch: Branch-Name (Standard: main)
|
||||
path_filter: Optionaler Filter für Pfade (z.B. 'posts/*/index.md')
|
||||
|
||||
Returns:
|
||||
Liste von URLs zu Markdown-Dateien
|
||||
|
|
@ -302,17 +304,23 @@ def fetch_forgejo_repo_markdown_files(repo_url: str, branch: str = 'main') -> Li
|
|||
owner = parts[-2]
|
||||
repo = parts[-1]
|
||||
|
||||
# API-URL ermitteln
|
||||
# Basis-URL ermitteln (für Raw-URLs)
|
||||
if 'codeberg.org' in repo_url:
|
||||
base_url = 'https://codeberg.org'
|
||||
api_base = 'https://codeberg.org/api/v1'
|
||||
elif 'git.rpi-virtuell.de' in repo_url:
|
||||
base_url = 'https://git.rpi-virtuell.de'
|
||||
api_base = 'https://git.rpi-virtuell.de/api/v1'
|
||||
elif 'gitea' in repo_url or 'forgejo' in repo_url:
|
||||
# Generischer Ansatz für selbst-gehostete Instanzen
|
||||
base_parts = repo_url.split('/')[:3]
|
||||
api_base = '/'.join(base_parts) + '/api/v1'
|
||||
base_url = '/'.join(base_parts)
|
||||
api_base = base_url + '/api/v1'
|
||||
else:
|
||||
print(f"Warnung: Unbekannte Forgejo-Instanz, versuche generischen API-Pfad")
|
||||
base_parts = repo_url.split('/')[:3]
|
||||
api_base = '/'.join(base_parts) + '/api/v1'
|
||||
base_url = '/'.join(base_parts)
|
||||
api_base = base_url + '/api/v1'
|
||||
|
||||
api_url = f"{api_base}/repos/{owner}/{repo}/git/trees/{branch}?recursive=true"
|
||||
|
||||
|
|
@ -324,8 +332,17 @@ def fetch_forgejo_repo_markdown_files(repo_url: str, branch: str = 'main') -> Li
|
|||
markdown_files = []
|
||||
for item in data.get('tree', []):
|
||||
if item['type'] == 'blob' and item['path'].endswith('.md'):
|
||||
# Filter anwenden falls vorhanden
|
||||
if path_filter:
|
||||
# Konvertiere Wildcard-Filter zu regex-Pattern
|
||||
# z.B. 'posts/*/index.md' -> '^posts/[^/]+/index\.md$'
|
||||
import re
|
||||
pattern = path_filter.replace('*', '[^/]+').replace('.', r'\.')
|
||||
if not re.match(f'^{pattern}$', item['path']):
|
||||
continue
|
||||
|
||||
# Raw-URL konstruieren
|
||||
raw_url = f"https://codeberg.org/{owner}/{repo}/raw/branch/{branch}/{item['path']}"
|
||||
raw_url = f"{base_url}/{owner}/{repo}/raw/branch/{branch}/{item['path']}"
|
||||
markdown_files.append(raw_url)
|
||||
|
||||
return markdown_files
|
||||
|
|
@ -385,12 +402,21 @@ def main():
|
|||
repo_index = sys.argv.index('--forgejo-repo') if '--forgejo-repo' in sys.argv else sys.argv.index('--repo')
|
||||
if len(sys.argv) > repo_index + 1:
|
||||
repo_url = sys.argv[repo_index + 1]
|
||||
branch = sys.argv[repo_index + 2] if len(sys.argv) > repo_index + 2 else 'main'
|
||||
branch = sys.argv[repo_index + 2] if len(sys.argv) > repo_index + 2 and not sys.argv[repo_index + 2].startswith('--') else 'main'
|
||||
|
||||
# Pfad-Filter für spezifische Dateien (z.B. nur index.md in posts/)
|
||||
path_filter = None
|
||||
if '--filter' in sys.argv:
|
||||
filter_index = sys.argv.index('--filter')
|
||||
if len(sys.argv) > filter_index + 1:
|
||||
path_filter = sys.argv[filter_index + 1]
|
||||
|
||||
print(f"Forgejo-Modus: Verarbeite Repository: {repo_url}")
|
||||
print(f"Branch: {branch}")
|
||||
if path_filter:
|
||||
print(f"Filter: {path_filter}")
|
||||
|
||||
markdown_urls = fetch_forgejo_repo_markdown_files(repo_url, branch)
|
||||
markdown_urls = fetch_forgejo_repo_markdown_files(repo_url, branch, path_filter)
|
||||
|
||||
if not markdown_urls:
|
||||
print("Keine Markdown-Dateien im Repository gefunden")
|
||||
|
|
@ -450,9 +476,13 @@ def main():
|
|||
if not os.path.exists(config_file):
|
||||
print(f"Fehler: Konfigurationsdatei '{config_file}' nicht gefunden")
|
||||
print("\nVerwendung:")
|
||||
print(" python workflow.py [config.yaml] # YAML-Konfiguration")
|
||||
print(" python workflow.py <URL> # Einzelne Markdown-URL")
|
||||
print(" python workflow.py --repo <REPO_URL> [branch] # Forgejo-Repository")
|
||||
print(" python workflow.py [config.yaml] # YAML-Konfiguration")
|
||||
print(" python workflow.py <URL> # Einzelne Markdown-URL")
|
||||
print(" python workflow.py --repo <REPO_URL> [branch] # Forgejo-Repository (alle .md)")
|
||||
print(" python workflow.py --repo <REPO_URL> [branch] --filter <pattern> # Mit Filter")
|
||||
print("\nFilter-Beispiele:")
|
||||
print(" --filter 'posts/*/index.md' # Nur index.md in posts/-Unterverzeichnissen")
|
||||
print(" --filter 'Website/content/posts/*/index.md' # Mit vollständigem Pfad")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Lade Konfiguration aus: {config_file}")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue