feat: implement core parser and Forgejo API client
Implementiert: - YAML Front Matter Extractor mit parse/remove/has Funktionen - AMB Metadata Extractor für Schema.org-konforme Metadaten - Core Parser mit unified/remark Pipeline - parseMarkdownFile für lokale Dateien - parseMarkdownString für Strings - extractHeadings, extractLinks, extractImages Utilities - Forgejo API Client - getFileContent, listDirectory, listPosts - getPostContent, getAllPosts - Volle API-Integration mit Token-Auth - Public API in src/index.js - 3 Beispiele (parse-forgejo, list-all-posts, parse-local) - 11 Unit Tests (alle passing) - Test-Fixtures mit AMB-konformen Beispieldaten Tests: 11 passing ✅ Beispiel erfolgreich mit echtem Forgejo-Repo getestet ✅
This commit is contained in:
parent
fbd6630f6d
commit
c31423d811
10 changed files with 1376 additions and 0 deletions
67
examples/list-all-posts.js
Normal file
67
examples/list-all-posts.js
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
/**
|
||||
* Beispiel: Alle Posts von Forgejo abrufen und analysieren
|
||||
*/
|
||||
|
||||
import { createForgejoClient } from '../src/forgejo-client.js'
|
||||
import { parseMarkdownString } from '../src/parser.js'
|
||||
|
||||
async function main() {
|
||||
console.log('🚀 Alle Posts von Forgejo abrufen\n')
|
||||
|
||||
try {
|
||||
const client = createForgejoClient()
|
||||
|
||||
console.log('📡 Liste alle Posts...')
|
||||
const postDirs = await client.listPosts()
|
||||
|
||||
console.log(`✅ ${postDirs.length} Posts gefunden\n`)
|
||||
|
||||
// Ersten 5 Posts parsen
|
||||
const limit = 5
|
||||
console.log(`🔍 Parse die ersten ${limit} Posts...\n`)
|
||||
|
||||
for (let i = 0; i < Math.min(limit, postDirs.length); i++) {
|
||||
const dir = postDirs[i]
|
||||
|
||||
console.log(`\n📄 [${i + 1}/${limit}] ${dir.name}`)
|
||||
console.log('─'.repeat(60))
|
||||
|
||||
try {
|
||||
const markdown = await client.getPostContent(dir.name)
|
||||
const result = await parseMarkdownString(markdown)
|
||||
|
||||
if (result.metadata) {
|
||||
console.log(` Titel: ${result.metadata.name || 'Unbekannt'}`)
|
||||
console.log(` Typ: ${result.metadata.type}`)
|
||||
console.log(` Datum: ${result.metadata.datePublished || 'N/A'}`)
|
||||
console.log(` Lizenz: ${result.metadata.license || 'N/A'}`)
|
||||
|
||||
if (result.metadata.creator) {
|
||||
const authors = result.metadata.creator
|
||||
.map(c => c.name || `${c.givenName} ${c.familyName}`)
|
||||
.join(', ')
|
||||
console.log(` Autoren: ${authors}`)
|
||||
}
|
||||
|
||||
console.log(` Content: ${result.content.length} Zeichen`)
|
||||
} else {
|
||||
console.log(' ⚠️ Keine Metadaten gefunden')
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.log(` ❌ Fehler: ${error.message}`)
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n\n📊 Zusammenfassung:')
|
||||
console.log(` Gesamt: ${postDirs.length} Posts im Repository`)
|
||||
console.log(` Analysiert: ${Math.min(limit, postDirs.length)} Posts`)
|
||||
console.log('\n✅ Fertig!')
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Fehler:', error.message)
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
main()
|
||||
101
examples/parse-forgejo.js
Normal file
101
examples/parse-forgejo.js
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
/**
|
||||
* Beispiel: Markdown-Datei von Forgejo API abrufen und parsen
|
||||
*/
|
||||
|
||||
import { createForgejoClient } from '../src/forgejo-client.js'
|
||||
import { parseMarkdownString } from '../src/parser.js'
|
||||
|
||||
async function main() {
|
||||
console.log('🚀 Forgejo API Beispiel\n')
|
||||
|
||||
try {
|
||||
// Forgejo Client erstellen (nutzt .env Konfiguration)
|
||||
const client = createForgejoClient()
|
||||
|
||||
console.log('📡 Verbinde mit Forgejo API...')
|
||||
console.log(` Repository: ${client.owner}/${client.repo}`)
|
||||
console.log(` Branch: ${client.branch}\n`)
|
||||
|
||||
// Repository-Info abrufen
|
||||
const repo = await client.getRepository()
|
||||
console.log('✅ Repository gefunden:')
|
||||
console.log(` Name: ${repo.name}`)
|
||||
console.log(` Beschreibung: ${repo.description}`)
|
||||
console.log(` Sprache: ${repo.language}\n`)
|
||||
|
||||
// Beispiel-Post abrufen
|
||||
const postPath = '2025-04-20-OER-und-Symbole'
|
||||
console.log(`📄 Rufe Post ab: ${postPath}`)
|
||||
|
||||
const markdown = await client.getPostContent(postPath)
|
||||
console.log(`✅ Markdown geladen (${markdown.length} Zeichen)\n`)
|
||||
|
||||
// Markdown parsen
|
||||
console.log('🔍 Parse Markdown...')
|
||||
const result = await parseMarkdownString(markdown)
|
||||
|
||||
// Ergebnisse anzeigen
|
||||
console.log('\n📊 Parse-Ergebnisse:\n')
|
||||
|
||||
if (result.metadata) {
|
||||
console.log('🏷️ Metadaten:')
|
||||
console.log(` Titel: ${result.metadata.name}`)
|
||||
console.log(` Typ: ${result.metadata.type}`)
|
||||
console.log(` Lizenz: ${result.metadata.license}`)
|
||||
console.log(` Datum: ${result.metadata.datePublished}`)
|
||||
|
||||
if (result.metadata.creator) {
|
||||
console.log(' Autoren:')
|
||||
result.metadata.creator.forEach(creator => {
|
||||
const name = creator.name || `${creator.givenName} ${creator.familyName}`
|
||||
console.log(` - ${name}`)
|
||||
if (creator.id) console.log(` ORCID: ${creator.id}`)
|
||||
})
|
||||
}
|
||||
|
||||
if (result.metadata._warnings && result.metadata._warnings.length > 0) {
|
||||
console.log('\n⚠️ Warnings:')
|
||||
result.metadata._warnings.forEach(w => console.log(` - ${w}`))
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n📝 Content:')
|
||||
console.log(` Länge: ${result.content.length} Zeichen`)
|
||||
console.log(` AST Nodes: ${countNodes(result.ast)}`)
|
||||
|
||||
// Überschriften extrahieren
|
||||
const { extractHeadings } = await import('../src/parser.js')
|
||||
const headings = extractHeadings(result.ast)
|
||||
|
||||
if (headings.length > 0) {
|
||||
console.log('\n📑 Überschriften:')
|
||||
headings.slice(0, 5).forEach(h => {
|
||||
const indent = ' '.repeat(h.level - 1)
|
||||
console.log(` ${indent}H${h.level}: ${h.text}`)
|
||||
})
|
||||
if (headings.length > 5) {
|
||||
console.log(` ... und ${headings.length - 5} weitere`)
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n✅ Erfolgreich!')
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Fehler:', error.message)
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// Hilfsfunktion: Zähle AST-Nodes
|
||||
function countNodes(node) {
|
||||
let count = 1
|
||||
if (node.children) {
|
||||
node.children.forEach(child => {
|
||||
count += countNodes(child)
|
||||
})
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
// Ausführen
|
||||
main()
|
||||
90
examples/parse-local.js
Normal file
90
examples/parse-local.js
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
/**
|
||||
* Beispiel: Lokale Markdown-Datei parsen
|
||||
*/
|
||||
|
||||
import { parseMarkdownFile } from '../src/parser.js'
|
||||
import { join, dirname } from 'path'
|
||||
import { fileURLToPath } from 'url'
|
||||
import { writeFile } from 'fs/promises'
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url))
|
||||
|
||||
async function main() {
|
||||
console.log('🚀 Lokale Datei parsen\n')
|
||||
|
||||
try {
|
||||
// Beispiel-Markdown-Datei
|
||||
const filePath = join(__dirname, '../test/fixtures/example.md')
|
||||
|
||||
console.log(`📄 Parse Datei: ${filePath}`)
|
||||
|
||||
const result = await parseMarkdownFile(filePath)
|
||||
|
||||
console.log('\n✅ Erfolgreich geparst!\n')
|
||||
|
||||
// Metadaten ausgeben
|
||||
if (result.metadata) {
|
||||
console.log('📋 Metadaten:')
|
||||
console.log(JSON.stringify(result.metadata, null, 2))
|
||||
}
|
||||
|
||||
// YAML ausgeben
|
||||
if (result.yaml) {
|
||||
console.log('\n📝 YAML Front Matter:')
|
||||
console.log(JSON.stringify(result.yaml, null, 2))
|
||||
}
|
||||
|
||||
// AST-Struktur
|
||||
console.log('\n🌲 AST Root:')
|
||||
console.log(` Type: ${result.ast.type}`)
|
||||
console.log(` Children: ${result.ast.children?.length || 0}`)
|
||||
|
||||
// Überschriften
|
||||
const { extractHeadings } = await import('../src/parser.js')
|
||||
const headings = extractHeadings(result.ast)
|
||||
|
||||
if (headings.length > 0) {
|
||||
console.log('\n📑 Überschriften:')
|
||||
headings.forEach(h => {
|
||||
const indent = ' '.repeat(h.level - 1)
|
||||
console.log(` ${indent}H${h.level}: ${h.text}`)
|
||||
})
|
||||
}
|
||||
|
||||
// Links
|
||||
const { extractLinks } = await import('../src/parser.js')
|
||||
const links = extractLinks(result.ast)
|
||||
|
||||
if (links.length > 0) {
|
||||
console.log('\n🔗 Links:')
|
||||
links.slice(0, 5).forEach(link => {
|
||||
console.log(` - ${link.text || 'Kein Text'}: ${link.url}`)
|
||||
})
|
||||
if (links.length > 5) {
|
||||
console.log(` ... und ${links.length - 5} weitere`)
|
||||
}
|
||||
}
|
||||
|
||||
// Bilder
|
||||
const { extractImages } = await import('../src/parser.js')
|
||||
const images = extractImages(result.ast)
|
||||
|
||||
if (images.length > 0) {
|
||||
console.log('\n🖼️ Bilder:')
|
||||
images.forEach(img => {
|
||||
console.log(` - ${img.alt || 'Kein Alt-Text'}: ${img.url}`)
|
||||
})
|
||||
}
|
||||
|
||||
// Optional: Ergebnis als JSON speichern
|
||||
const outputPath = join(__dirname, '../test/output/result.json')
|
||||
await writeFile(outputPath, JSON.stringify(result, null, 2))
|
||||
console.log(`\n💾 Ergebnis gespeichert: ${outputPath}`)
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Fehler:', error.message)
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue