feat: implement core parser and Forgejo API client

Implementiert:
- YAML Front Matter Extractor mit parse/remove/has Funktionen
- AMB Metadata Extractor für Schema.org-konforme Metadaten
- Core Parser mit unified/remark Pipeline
  - parseMarkdownFile für lokale Dateien
  - parseMarkdownString für Strings
  - extractHeadings, extractLinks, extractImages Utilities
- Forgejo API Client
  - getFileContent, listDirectory, listPosts
  - getPostContent, getAllPosts
  - Volle API-Integration mit Token-Auth
- Public API in src/index.js
- 3 Beispiele (parse-forgejo, list-all-posts, parse-local)
- 11 Unit Tests (alle passing)
- Test-Fixtures mit AMB-konformen Beispieldaten

Tests: 11 passing 
Beispiel erfolgreich mit echtem Forgejo-Repo getestet 
This commit is contained in:
Jörg Lohrer 2025-10-01 15:37:55 +02:00
parent fbd6630f6d
commit c31423d811
10 changed files with 1376 additions and 0 deletions

View file

@ -0,0 +1,67 @@
/**
* Beispiel: Alle Posts von Forgejo abrufen und analysieren
*/
import { createForgejoClient } from '../src/forgejo-client.js'
import { parseMarkdownString } from '../src/parser.js'
async function main() {
console.log('🚀 Alle Posts von Forgejo abrufen\n')
try {
const client = createForgejoClient()
console.log('📡 Liste alle Posts...')
const postDirs = await client.listPosts()
console.log(`${postDirs.length} Posts gefunden\n`)
// Ersten 5 Posts parsen
const limit = 5
console.log(`🔍 Parse die ersten ${limit} Posts...\n`)
for (let i = 0; i < Math.min(limit, postDirs.length); i++) {
const dir = postDirs[i]
console.log(`\n📄 [${i + 1}/${limit}] ${dir.name}`)
console.log('─'.repeat(60))
try {
const markdown = await client.getPostContent(dir.name)
const result = await parseMarkdownString(markdown)
if (result.metadata) {
console.log(` Titel: ${result.metadata.name || 'Unbekannt'}`)
console.log(` Typ: ${result.metadata.type}`)
console.log(` Datum: ${result.metadata.datePublished || 'N/A'}`)
console.log(` Lizenz: ${result.metadata.license || 'N/A'}`)
if (result.metadata.creator) {
const authors = result.metadata.creator
.map(c => c.name || `${c.givenName} ${c.familyName}`)
.join(', ')
console.log(` Autoren: ${authors}`)
}
console.log(` Content: ${result.content.length} Zeichen`)
} else {
console.log(' ⚠️ Keine Metadaten gefunden')
}
} catch (error) {
console.log(` ❌ Fehler: ${error.message}`)
}
}
console.log('\n\n📊 Zusammenfassung:')
console.log(` Gesamt: ${postDirs.length} Posts im Repository`)
console.log(` Analysiert: ${Math.min(limit, postDirs.length)} Posts`)
console.log('\n✅ Fertig!')
} catch (error) {
console.error('❌ Fehler:', error.message)
process.exit(1)
}
}
main()

101
examples/parse-forgejo.js Normal file
View file

@ -0,0 +1,101 @@
/**
* Beispiel: Markdown-Datei von Forgejo API abrufen und parsen
*/
import { createForgejoClient } from '../src/forgejo-client.js'
import { parseMarkdownString } from '../src/parser.js'
async function main() {
console.log('🚀 Forgejo API Beispiel\n')
try {
// Forgejo Client erstellen (nutzt .env Konfiguration)
const client = createForgejoClient()
console.log('📡 Verbinde mit Forgejo API...')
console.log(` Repository: ${client.owner}/${client.repo}`)
console.log(` Branch: ${client.branch}\n`)
// Repository-Info abrufen
const repo = await client.getRepository()
console.log('✅ Repository gefunden:')
console.log(` Name: ${repo.name}`)
console.log(` Beschreibung: ${repo.description}`)
console.log(` Sprache: ${repo.language}\n`)
// Beispiel-Post abrufen
const postPath = '2025-04-20-OER-und-Symbole'
console.log(`📄 Rufe Post ab: ${postPath}`)
const markdown = await client.getPostContent(postPath)
console.log(`✅ Markdown geladen (${markdown.length} Zeichen)\n`)
// Markdown parsen
console.log('🔍 Parse Markdown...')
const result = await parseMarkdownString(markdown)
// Ergebnisse anzeigen
console.log('\n📊 Parse-Ergebnisse:\n')
if (result.metadata) {
console.log('🏷️ Metadaten:')
console.log(` Titel: ${result.metadata.name}`)
console.log(` Typ: ${result.metadata.type}`)
console.log(` Lizenz: ${result.metadata.license}`)
console.log(` Datum: ${result.metadata.datePublished}`)
if (result.metadata.creator) {
console.log(' Autoren:')
result.metadata.creator.forEach(creator => {
const name = creator.name || `${creator.givenName} ${creator.familyName}`
console.log(` - ${name}`)
if (creator.id) console.log(` ORCID: ${creator.id}`)
})
}
if (result.metadata._warnings && result.metadata._warnings.length > 0) {
console.log('\n⚠ Warnings:')
result.metadata._warnings.forEach(w => console.log(` - ${w}`))
}
}
console.log('\n📝 Content:')
console.log(` Länge: ${result.content.length} Zeichen`)
console.log(` AST Nodes: ${countNodes(result.ast)}`)
// Überschriften extrahieren
const { extractHeadings } = await import('../src/parser.js')
const headings = extractHeadings(result.ast)
if (headings.length > 0) {
console.log('\n📑 Überschriften:')
headings.slice(0, 5).forEach(h => {
const indent = ' '.repeat(h.level - 1)
console.log(` ${indent}H${h.level}: ${h.text}`)
})
if (headings.length > 5) {
console.log(` ... und ${headings.length - 5} weitere`)
}
}
console.log('\n✅ Erfolgreich!')
} catch (error) {
console.error('❌ Fehler:', error.message)
process.exit(1)
}
}
// Hilfsfunktion: Zähle AST-Nodes
function countNodes(node) {
let count = 1
if (node.children) {
node.children.forEach(child => {
count += countNodes(child)
})
}
return count
}
// Ausführen
main()

90
examples/parse-local.js Normal file
View file

@ -0,0 +1,90 @@
/**
* Beispiel: Lokale Markdown-Datei parsen
*/
import { parseMarkdownFile } from '../src/parser.js'
import { join, dirname } from 'path'
import { fileURLToPath } from 'url'
import { writeFile } from 'fs/promises'
const __dirname = dirname(fileURLToPath(import.meta.url))
async function main() {
console.log('🚀 Lokale Datei parsen\n')
try {
// Beispiel-Markdown-Datei
const filePath = join(__dirname, '../test/fixtures/example.md')
console.log(`📄 Parse Datei: ${filePath}`)
const result = await parseMarkdownFile(filePath)
console.log('\n✅ Erfolgreich geparst!\n')
// Metadaten ausgeben
if (result.metadata) {
console.log('📋 Metadaten:')
console.log(JSON.stringify(result.metadata, null, 2))
}
// YAML ausgeben
if (result.yaml) {
console.log('\n📝 YAML Front Matter:')
console.log(JSON.stringify(result.yaml, null, 2))
}
// AST-Struktur
console.log('\n🌲 AST Root:')
console.log(` Type: ${result.ast.type}`)
console.log(` Children: ${result.ast.children?.length || 0}`)
// Überschriften
const { extractHeadings } = await import('../src/parser.js')
const headings = extractHeadings(result.ast)
if (headings.length > 0) {
console.log('\n📑 Überschriften:')
headings.forEach(h => {
const indent = ' '.repeat(h.level - 1)
console.log(` ${indent}H${h.level}: ${h.text}`)
})
}
// Links
const { extractLinks } = await import('../src/parser.js')
const links = extractLinks(result.ast)
if (links.length > 0) {
console.log('\n🔗 Links:')
links.slice(0, 5).forEach(link => {
console.log(` - ${link.text || 'Kein Text'}: ${link.url}`)
})
if (links.length > 5) {
console.log(` ... und ${links.length - 5} weitere`)
}
}
// Bilder
const { extractImages } = await import('../src/parser.js')
const images = extractImages(result.ast)
if (images.length > 0) {
console.log('\n🖼 Bilder:')
images.forEach(img => {
console.log(` - ${img.alt || 'Kein Alt-Text'}: ${img.url}`)
})
}
// Optional: Ergebnis als JSON speichern
const outputPath = join(__dirname, '../test/output/result.json')
await writeFile(outputPath, JSON.stringify(result, null, 2))
console.log(`\n💾 Ergebnis gespeichert: ${outputPath}`)
} catch (error) {
console.error('❌ Fehler:', error.message)
process.exit(1)
}
}
main()