feat: implement core parser and Forgejo API client
Implementiert: - YAML Front Matter Extractor mit parse/remove/has Funktionen - AMB Metadata Extractor für Schema.org-konforme Metadaten - Core Parser mit unified/remark Pipeline - parseMarkdownFile für lokale Dateien - parseMarkdownString für Strings - extractHeadings, extractLinks, extractImages Utilities - Forgejo API Client - getFileContent, listDirectory, listPosts - getPostContent, getAllPosts - Volle API-Integration mit Token-Auth - Public API in src/index.js - 3 Beispiele (parse-forgejo, list-all-posts, parse-local) - 11 Unit Tests (alle passing) - Test-Fixtures mit AMB-konformen Beispieldaten Tests: 11 passing ✅ Beispiel erfolgreich mit echtem Forgejo-Repo getestet ✅
This commit is contained in:
parent
fbd6630f6d
commit
c31423d811
10 changed files with 1376 additions and 0 deletions
67
examples/list-all-posts.js
Normal file
67
examples/list-all-posts.js
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
/**
|
||||
* Beispiel: Alle Posts von Forgejo abrufen und analysieren
|
||||
*/
|
||||
|
||||
import { createForgejoClient } from '../src/forgejo-client.js'
|
||||
import { parseMarkdownString } from '../src/parser.js'
|
||||
|
||||
async function main() {
|
||||
console.log('🚀 Alle Posts von Forgejo abrufen\n')
|
||||
|
||||
try {
|
||||
const client = createForgejoClient()
|
||||
|
||||
console.log('📡 Liste alle Posts...')
|
||||
const postDirs = await client.listPosts()
|
||||
|
||||
console.log(`✅ ${postDirs.length} Posts gefunden\n`)
|
||||
|
||||
// Ersten 5 Posts parsen
|
||||
const limit = 5
|
||||
console.log(`🔍 Parse die ersten ${limit} Posts...\n`)
|
||||
|
||||
for (let i = 0; i < Math.min(limit, postDirs.length); i++) {
|
||||
const dir = postDirs[i]
|
||||
|
||||
console.log(`\n📄 [${i + 1}/${limit}] ${dir.name}`)
|
||||
console.log('─'.repeat(60))
|
||||
|
||||
try {
|
||||
const markdown = await client.getPostContent(dir.name)
|
||||
const result = await parseMarkdownString(markdown)
|
||||
|
||||
if (result.metadata) {
|
||||
console.log(` Titel: ${result.metadata.name || 'Unbekannt'}`)
|
||||
console.log(` Typ: ${result.metadata.type}`)
|
||||
console.log(` Datum: ${result.metadata.datePublished || 'N/A'}`)
|
||||
console.log(` Lizenz: ${result.metadata.license || 'N/A'}`)
|
||||
|
||||
if (result.metadata.creator) {
|
||||
const authors = result.metadata.creator
|
||||
.map(c => c.name || `${c.givenName} ${c.familyName}`)
|
||||
.join(', ')
|
||||
console.log(` Autoren: ${authors}`)
|
||||
}
|
||||
|
||||
console.log(` Content: ${result.content.length} Zeichen`)
|
||||
} else {
|
||||
console.log(' ⚠️ Keine Metadaten gefunden')
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.log(` ❌ Fehler: ${error.message}`)
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n\n📊 Zusammenfassung:')
|
||||
console.log(` Gesamt: ${postDirs.length} Posts im Repository`)
|
||||
console.log(` Analysiert: ${Math.min(limit, postDirs.length)} Posts`)
|
||||
console.log('\n✅ Fertig!')
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Fehler:', error.message)
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
main()
|
||||
101
examples/parse-forgejo.js
Normal file
101
examples/parse-forgejo.js
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
/**
|
||||
* Beispiel: Markdown-Datei von Forgejo API abrufen und parsen
|
||||
*/
|
||||
|
||||
import { createForgejoClient } from '../src/forgejo-client.js'
|
||||
import { parseMarkdownString } from '../src/parser.js'
|
||||
|
||||
async function main() {
|
||||
console.log('🚀 Forgejo API Beispiel\n')
|
||||
|
||||
try {
|
||||
// Forgejo Client erstellen (nutzt .env Konfiguration)
|
||||
const client = createForgejoClient()
|
||||
|
||||
console.log('📡 Verbinde mit Forgejo API...')
|
||||
console.log(` Repository: ${client.owner}/${client.repo}`)
|
||||
console.log(` Branch: ${client.branch}\n`)
|
||||
|
||||
// Repository-Info abrufen
|
||||
const repo = await client.getRepository()
|
||||
console.log('✅ Repository gefunden:')
|
||||
console.log(` Name: ${repo.name}`)
|
||||
console.log(` Beschreibung: ${repo.description}`)
|
||||
console.log(` Sprache: ${repo.language}\n`)
|
||||
|
||||
// Beispiel-Post abrufen
|
||||
const postPath = '2025-04-20-OER-und-Symbole'
|
||||
console.log(`📄 Rufe Post ab: ${postPath}`)
|
||||
|
||||
const markdown = await client.getPostContent(postPath)
|
||||
console.log(`✅ Markdown geladen (${markdown.length} Zeichen)\n`)
|
||||
|
||||
// Markdown parsen
|
||||
console.log('🔍 Parse Markdown...')
|
||||
const result = await parseMarkdownString(markdown)
|
||||
|
||||
// Ergebnisse anzeigen
|
||||
console.log('\n📊 Parse-Ergebnisse:\n')
|
||||
|
||||
if (result.metadata) {
|
||||
console.log('🏷️ Metadaten:')
|
||||
console.log(` Titel: ${result.metadata.name}`)
|
||||
console.log(` Typ: ${result.metadata.type}`)
|
||||
console.log(` Lizenz: ${result.metadata.license}`)
|
||||
console.log(` Datum: ${result.metadata.datePublished}`)
|
||||
|
||||
if (result.metadata.creator) {
|
||||
console.log(' Autoren:')
|
||||
result.metadata.creator.forEach(creator => {
|
||||
const name = creator.name || `${creator.givenName} ${creator.familyName}`
|
||||
console.log(` - ${name}`)
|
||||
if (creator.id) console.log(` ORCID: ${creator.id}`)
|
||||
})
|
||||
}
|
||||
|
||||
if (result.metadata._warnings && result.metadata._warnings.length > 0) {
|
||||
console.log('\n⚠️ Warnings:')
|
||||
result.metadata._warnings.forEach(w => console.log(` - ${w}`))
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n📝 Content:')
|
||||
console.log(` Länge: ${result.content.length} Zeichen`)
|
||||
console.log(` AST Nodes: ${countNodes(result.ast)}`)
|
||||
|
||||
// Überschriften extrahieren
|
||||
const { extractHeadings } = await import('../src/parser.js')
|
||||
const headings = extractHeadings(result.ast)
|
||||
|
||||
if (headings.length > 0) {
|
||||
console.log('\n📑 Überschriften:')
|
||||
headings.slice(0, 5).forEach(h => {
|
||||
const indent = ' '.repeat(h.level - 1)
|
||||
console.log(` ${indent}H${h.level}: ${h.text}`)
|
||||
})
|
||||
if (headings.length > 5) {
|
||||
console.log(` ... und ${headings.length - 5} weitere`)
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n✅ Erfolgreich!')
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Fehler:', error.message)
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// Hilfsfunktion: Zähle AST-Nodes
|
||||
function countNodes(node) {
|
||||
let count = 1
|
||||
if (node.children) {
|
||||
node.children.forEach(child => {
|
||||
count += countNodes(child)
|
||||
})
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
// Ausführen
|
||||
main()
|
||||
90
examples/parse-local.js
Normal file
90
examples/parse-local.js
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
/**
|
||||
* Beispiel: Lokale Markdown-Datei parsen
|
||||
*/
|
||||
|
||||
import { parseMarkdownFile } from '../src/parser.js'
|
||||
import { join, dirname } from 'path'
|
||||
import { fileURLToPath } from 'url'
|
||||
import { writeFile } from 'fs/promises'
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url))
|
||||
|
||||
async function main() {
|
||||
console.log('🚀 Lokale Datei parsen\n')
|
||||
|
||||
try {
|
||||
// Beispiel-Markdown-Datei
|
||||
const filePath = join(__dirname, '../test/fixtures/example.md')
|
||||
|
||||
console.log(`📄 Parse Datei: ${filePath}`)
|
||||
|
||||
const result = await parseMarkdownFile(filePath)
|
||||
|
||||
console.log('\n✅ Erfolgreich geparst!\n')
|
||||
|
||||
// Metadaten ausgeben
|
||||
if (result.metadata) {
|
||||
console.log('📋 Metadaten:')
|
||||
console.log(JSON.stringify(result.metadata, null, 2))
|
||||
}
|
||||
|
||||
// YAML ausgeben
|
||||
if (result.yaml) {
|
||||
console.log('\n📝 YAML Front Matter:')
|
||||
console.log(JSON.stringify(result.yaml, null, 2))
|
||||
}
|
||||
|
||||
// AST-Struktur
|
||||
console.log('\n🌲 AST Root:')
|
||||
console.log(` Type: ${result.ast.type}`)
|
||||
console.log(` Children: ${result.ast.children?.length || 0}`)
|
||||
|
||||
// Überschriften
|
||||
const { extractHeadings } = await import('../src/parser.js')
|
||||
const headings = extractHeadings(result.ast)
|
||||
|
||||
if (headings.length > 0) {
|
||||
console.log('\n📑 Überschriften:')
|
||||
headings.forEach(h => {
|
||||
const indent = ' '.repeat(h.level - 1)
|
||||
console.log(` ${indent}H${h.level}: ${h.text}`)
|
||||
})
|
||||
}
|
||||
|
||||
// Links
|
||||
const { extractLinks } = await import('../src/parser.js')
|
||||
const links = extractLinks(result.ast)
|
||||
|
||||
if (links.length > 0) {
|
||||
console.log('\n🔗 Links:')
|
||||
links.slice(0, 5).forEach(link => {
|
||||
console.log(` - ${link.text || 'Kein Text'}: ${link.url}`)
|
||||
})
|
||||
if (links.length > 5) {
|
||||
console.log(` ... und ${links.length - 5} weitere`)
|
||||
}
|
||||
}
|
||||
|
||||
// Bilder
|
||||
const { extractImages } = await import('../src/parser.js')
|
||||
const images = extractImages(result.ast)
|
||||
|
||||
if (images.length > 0) {
|
||||
console.log('\n🖼️ Bilder:')
|
||||
images.forEach(img => {
|
||||
console.log(` - ${img.alt || 'Kein Alt-Text'}: ${img.url}`)
|
||||
})
|
||||
}
|
||||
|
||||
// Optional: Ergebnis als JSON speichern
|
||||
const outputPath = join(__dirname, '../test/output/result.json')
|
||||
await writeFile(outputPath, JSON.stringify(result, null, 2))
|
||||
console.log(`\n💾 Ergebnis gespeichert: ${outputPath}`)
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Fehler:', error.message)
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
main()
|
||||
267
src/extractors/amb-extractor.js
Normal file
267
src/extractors/amb-extractor.js
Normal file
|
|
@ -0,0 +1,267 @@
|
|||
/**
|
||||
* AMB Metadata Extractor
|
||||
* Extrahiert und transformiert Schema.org-konforme AMB-Metadaten
|
||||
* aus YAML Front Matter
|
||||
*/
|
||||
|
||||
/**
|
||||
* Extrahiert AMB-konforme Metadaten aus YAML-Objekt
|
||||
* @param {Object} yamlObject - Geparstes YAML Front Matter
|
||||
* @returns {Object} Schema.org-konforme Metadaten
|
||||
*/
|
||||
export function extractAMBMetadata(yamlObject) {
|
||||
if (!yamlObject || typeof yamlObject !== 'object') {
|
||||
return createEmptyMetadata()
|
||||
}
|
||||
|
||||
const warnings = []
|
||||
const commonMetadata = yamlObject.commonMetadata || {}
|
||||
|
||||
// Basis-Metadaten extrahieren
|
||||
const metadata = {
|
||||
'@context': commonMetadata['@context'] || 'https://schema.org/',
|
||||
type: commonMetadata.type || 'LearningResource',
|
||||
|
||||
// Titel
|
||||
name: extractField(commonMetadata, 'name', yamlObject.title, warnings),
|
||||
|
||||
// Beschreibung
|
||||
description: extractField(
|
||||
commonMetadata,
|
||||
'description',
|
||||
yamlObject.summary || yamlObject.description,
|
||||
warnings
|
||||
),
|
||||
|
||||
// Lizenz
|
||||
license: commonMetadata.license || null,
|
||||
|
||||
// ID/URL
|
||||
id: commonMetadata.id || commonMetadata.url || yamlObject.url || null,
|
||||
|
||||
// Sprache
|
||||
inLanguage: commonMetadata.inLanguage || null,
|
||||
|
||||
// Veröffentlichungsdatum
|
||||
datePublished: extractDate(
|
||||
commonMetadata.datePublished || yamlObject.datePublished
|
||||
),
|
||||
|
||||
// Autoren/Creator
|
||||
creator: extractCreators(commonMetadata.creator, yamlObject.author),
|
||||
|
||||
// Bild
|
||||
image: extractImage(commonMetadata.image, yamlObject.cover?.image),
|
||||
|
||||
// Themen/Tags
|
||||
about: commonMetadata.about || null,
|
||||
|
||||
// Lernressourcentyp
|
||||
learningResourceType: commonMetadata.learningResourceType || null,
|
||||
|
||||
// Bildungsniveau
|
||||
educationalLevel: commonMetadata.educationalLevel || null,
|
||||
|
||||
// Status
|
||||
creativeWorkStatus: commonMetadata.creativeWorkStatus || null
|
||||
}
|
||||
|
||||
// Warnings hinzufügen wenn vorhanden
|
||||
if (warnings.length > 0) {
|
||||
metadata._warnings = warnings
|
||||
}
|
||||
|
||||
// Zusätzliche Metadaten aus staticSiteGenerator (Hugo/PaperMod)
|
||||
if (yamlObject.tags) {
|
||||
metadata._tags = yamlObject.tags
|
||||
}
|
||||
|
||||
return metadata
|
||||
}
|
||||
|
||||
/**
|
||||
* Erstellt leeres Metadaten-Objekt mit Defaults
|
||||
* @returns {Object} Leeres Metadaten-Objekt
|
||||
*/
|
||||
function createEmptyMetadata() {
|
||||
return {
|
||||
'@context': 'https://schema.org/',
|
||||
type: 'LearningResource',
|
||||
name: null,
|
||||
description: null,
|
||||
_warnings: ['Keine YAML-Metadaten gefunden']
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extrahiert ein Feld mit Fallback und Warning
|
||||
* @param {Object} source - Haupt-Quelle
|
||||
* @param {string} field - Feldname
|
||||
* @param {*} fallback - Fallback-Wert
|
||||
* @param {Array} warnings - Warning-Array
|
||||
* @returns {*} Extrahierter Wert
|
||||
*/
|
||||
function extractField(source, field, fallback, warnings) {
|
||||
if (source && source[field]) {
|
||||
return source[field]
|
||||
}
|
||||
|
||||
if (fallback) {
|
||||
warnings.push(`Feld 'commonMetadata.${field}' fehlt, verwende Fallback`)
|
||||
return fallback
|
||||
}
|
||||
|
||||
warnings.push(`Pflichtfeld 'commonMetadata.${field}' fehlt`)
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Extrahiert und normalisiert Datum
|
||||
* @param {string|Date} dateValue - Datum als String oder Date-Objekt
|
||||
* @returns {string|null} ISO 8601 Datum oder null
|
||||
*/
|
||||
function extractDate(dateValue) {
|
||||
if (!dateValue) return null
|
||||
|
||||
try {
|
||||
const date = new Date(dateValue)
|
||||
return date.toISOString().split('T')[0] // YYYY-MM-DD
|
||||
} catch (error) {
|
||||
return dateValue // Falls Parsing fehlschlägt, Original zurückgeben
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extrahiert Creator/Author-Informationen
|
||||
* @param {Array|Object} creators - Creator aus commonMetadata
|
||||
* @param {Array|string} authors - Author aus staticSiteGenerator
|
||||
* @returns {Array|null} Array von Creator-Objekten
|
||||
*/
|
||||
function extractCreators(creators, authors) {
|
||||
// Priorität: commonMetadata.creator
|
||||
if (creators) {
|
||||
if (Array.isArray(creators)) {
|
||||
return creators.map(normalizeCreator)
|
||||
}
|
||||
return [normalizeCreator(creators)]
|
||||
}
|
||||
|
||||
// Fallback: author (einfacher String oder Array)
|
||||
if (authors) {
|
||||
if (Array.isArray(authors)) {
|
||||
return authors.map(name => ({
|
||||
type: 'Person',
|
||||
name: name
|
||||
}))
|
||||
}
|
||||
return [{
|
||||
type: 'Person',
|
||||
name: authors
|
||||
}]
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalisiert Creator-Objekt nach Schema.org
|
||||
* @param {Object} creator - Creator-Objekt
|
||||
* @returns {Object} Normalisiertes Creator-Objekt
|
||||
*/
|
||||
function normalizeCreator(creator) {
|
||||
if (typeof creator === 'string') {
|
||||
return {
|
||||
type: 'Person',
|
||||
name: creator
|
||||
}
|
||||
}
|
||||
|
||||
const normalized = {
|
||||
type: creator.type || 'Person'
|
||||
}
|
||||
|
||||
// Person
|
||||
if (creator.givenName || creator.familyName) {
|
||||
normalized.givenName = creator.givenName
|
||||
normalized.familyName = creator.familyName
|
||||
normalized.name = `${creator.givenName || ''} ${creator.familyName || ''}`.trim()
|
||||
} else if (creator.name) {
|
||||
normalized.name = creator.name
|
||||
}
|
||||
|
||||
// ID (ORCID, ROR, etc.)
|
||||
if (creator.id) {
|
||||
normalized.id = creator.id
|
||||
}
|
||||
|
||||
// Affiliation
|
||||
if (creator.affiliation) {
|
||||
normalized.affiliation = normalizeOrganization(creator.affiliation)
|
||||
}
|
||||
|
||||
return normalized
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalisiert Organization-Objekt
|
||||
* @param {Object|string} org - Organization
|
||||
* @returns {Object} Normalisiertes Organization-Objekt
|
||||
*/
|
||||
function normalizeOrganization(org) {
|
||||
if (typeof org === 'string') {
|
||||
return {
|
||||
type: 'Organization',
|
||||
name: org
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
type: 'Organization',
|
||||
name: org.name,
|
||||
id: org.id || null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extrahiert Bild-URL
|
||||
* @param {string} ambImage - Bild aus commonMetadata
|
||||
* @param {string} coverImage - Bild aus cover
|
||||
* @returns {string|null} Bild-URL
|
||||
*/
|
||||
function extractImage(ambImage, coverImage) {
|
||||
return ambImage || coverImage || null
|
||||
}
|
||||
|
||||
/**
|
||||
* Validiert AMB-Metadaten auf Vollständigkeit
|
||||
* @param {Object} metadata - Zu validierende Metadaten
|
||||
* @returns {Object} Validierungs-Ergebnis
|
||||
*/
|
||||
export function validateAMBMetadata(metadata) {
|
||||
const errors = []
|
||||
const warnings = []
|
||||
|
||||
// Pflichtfelder
|
||||
const requiredFields = ['name', 'description', 'license']
|
||||
|
||||
requiredFields.forEach(field => {
|
||||
if (!metadata[field]) {
|
||||
errors.push(`Pflichtfeld fehlt: ${field}`)
|
||||
}
|
||||
})
|
||||
|
||||
// Empfohlene Felder
|
||||
const recommendedFields = ['creator', 'datePublished', 'about', 'id']
|
||||
|
||||
recommendedFields.forEach(field => {
|
||||
if (!metadata[field]) {
|
||||
warnings.push(`Empfohlenes Feld fehlt: ${field}`)
|
||||
}
|
||||
})
|
||||
|
||||
return {
|
||||
valid: errors.length === 0,
|
||||
errors,
|
||||
warnings
|
||||
}
|
||||
}
|
||||
65
src/extractors/yaml-extractor.js
Normal file
65
src/extractors/yaml-extractor.js
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
/**
|
||||
* YAML Extractor
|
||||
* Extrahiert und parst YAML Front Matter aus Markdown-Inhalten
|
||||
*/
|
||||
|
||||
import { parse as parseYaml } from 'yaml'
|
||||
|
||||
/**
|
||||
* Extrahiert YAML Front Matter aus Markdown-Content
|
||||
* @param {string} markdownContent - Roher Markdown-Content
|
||||
* @returns {Object|null} Geparstes YAML-Objekt oder null
|
||||
*/
|
||||
export function extractYAML(markdownContent) {
|
||||
if (!markdownContent || typeof markdownContent !== 'string') {
|
||||
return null
|
||||
}
|
||||
|
||||
// YAML Front Matter Pattern: ---\n...\n---
|
||||
const yamlPattern = /^---\s*\n([\s\S]*?)\n---\s*\n/
|
||||
const match = markdownContent.match(yamlPattern)
|
||||
|
||||
if (!match || !match[1]) {
|
||||
return null
|
||||
}
|
||||
|
||||
try {
|
||||
const yamlString = match[1]
|
||||
const parsed = parseYaml(yamlString)
|
||||
return parsed
|
||||
} catch (error) {
|
||||
console.error('YAML Parse Error:', error.message)
|
||||
return {
|
||||
_error: 'YAML parsing failed',
|
||||
_errorDetails: error.message
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Entfernt YAML Front Matter aus Markdown-Content
|
||||
* @param {string} markdownContent - Markdown mit YAML Front Matter
|
||||
* @returns {string} Markdown ohne Front Matter
|
||||
*/
|
||||
export function removeYAML(markdownContent) {
|
||||
if (!markdownContent || typeof markdownContent !== 'string') {
|
||||
return markdownContent
|
||||
}
|
||||
|
||||
const yamlPattern = /^---\s*\n[\s\S]*?\n---\s*\n/
|
||||
return markdownContent.replace(yamlPattern, '').trim()
|
||||
}
|
||||
|
||||
/**
|
||||
* Validiert, ob ein String YAML Front Matter enthält
|
||||
* @param {string} markdownContent - Zu prüfender Content
|
||||
* @returns {boolean} True wenn YAML Front Matter vorhanden
|
||||
*/
|
||||
export function hasYAML(markdownContent) {
|
||||
if (!markdownContent || typeof markdownContent !== 'string') {
|
||||
return false
|
||||
}
|
||||
|
||||
const yamlPattern = /^---\s*\n[\s\S]*?\n---\s*\n/
|
||||
return yamlPattern.test(markdownContent)
|
||||
}
|
||||
232
src/forgejo-client.js
Normal file
232
src/forgejo-client.js
Normal file
|
|
@ -0,0 +1,232 @@
|
|||
/**
|
||||
* Forgejo/Gitea API Client
|
||||
* Ermöglicht Zugriff auf Repository-Inhalte über die Forgejo/Gitea API
|
||||
*/
|
||||
|
||||
import { config } from 'dotenv'
|
||||
|
||||
// Environment-Variablen laden
|
||||
config()
|
||||
|
||||
/**
|
||||
* Forgejo API Client
|
||||
*/
|
||||
export class ForgejoClient {
|
||||
/**
|
||||
* @param {Object} options - Konfiguration
|
||||
* @param {string} options.baseUrl - API Base URL
|
||||
* @param {string} options.owner - Repository Owner
|
||||
* @param {string} options.repo - Repository Name
|
||||
* @param {string} options.branch - Branch (default: main)
|
||||
* @param {string} options.token - API Token (optional für öffentliche Repos)
|
||||
*/
|
||||
constructor(options = {}) {
|
||||
this.baseUrl = options.baseUrl || process.env.FORGEJO_API_BASE_URL
|
||||
this.owner = options.owner || process.env.FORGEJO_OWNER
|
||||
this.repo = options.repo || process.env.FORGEJO_REPO
|
||||
this.branch = options.branch || process.env.FORGEJO_BRANCH || 'main'
|
||||
this.token = options.token || process.env.FORGEJO_TOKEN
|
||||
|
||||
if (!this.baseUrl || !this.owner || !this.repo) {
|
||||
throw new Error('Forgejo client requires baseUrl, owner, and repo')
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Erstellt Request-Headers mit optionalem Token
|
||||
* @returns {Object} Headers
|
||||
*/
|
||||
getHeaders() {
|
||||
const headers = {
|
||||
'Accept': 'application/json'
|
||||
}
|
||||
|
||||
if (this.token) {
|
||||
headers['Authorization'] = `token ${this.token}`
|
||||
}
|
||||
|
||||
return headers
|
||||
}
|
||||
|
||||
/**
|
||||
* Führt einen API-Request aus
|
||||
* @param {string} endpoint - API-Endpoint
|
||||
* @returns {Promise<Object>} Response-Daten
|
||||
*/
|
||||
async request(endpoint) {
|
||||
const url = `${this.baseUrl}${endpoint}`
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
headers: this.getHeaders()
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(
|
||||
`Forgejo API Error: ${response.status} ${response.statusText}`
|
||||
)
|
||||
}
|
||||
|
||||
return await response.json()
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to fetch from Forgejo: ${error.message}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ruft Dateiinhalt aus dem Repository ab
|
||||
* @param {string} path - Dateipfad im Repository
|
||||
* @param {string} ref - Branch/Tag/Commit (optional)
|
||||
* @returns {Promise<string>} Dateiinhalt als String
|
||||
*/
|
||||
async getFileContent(path, ref = null) {
|
||||
const branch = ref || this.branch
|
||||
const endpoint = `/repos/${this.owner}/${this.repo}/contents/${path}?ref=${branch}`
|
||||
|
||||
try {
|
||||
const data = await this.request(endpoint)
|
||||
|
||||
// Forgejo gibt Base64-kodierten Content zurück
|
||||
if (data.content && data.encoding === 'base64') {
|
||||
return Buffer.from(data.content, 'base64').toString('utf-8')
|
||||
}
|
||||
|
||||
// Fallback: Download-URL verwenden
|
||||
if (data.download_url) {
|
||||
const response = await fetch(data.download_url)
|
||||
return await response.text()
|
||||
}
|
||||
|
||||
throw new Error('No content or download_url in response')
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to get file content: ${error.message}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Listet Inhalte eines Verzeichnisses auf
|
||||
* @param {string} path - Verzeichnispfad
|
||||
* @param {string} ref - Branch/Tag/Commit (optional)
|
||||
* @returns {Promise<Array>} Array von Dateien/Verzeichnissen
|
||||
*/
|
||||
async listDirectory(path, ref = null) {
|
||||
const branch = ref || this.branch
|
||||
const endpoint = `/repos/${this.owner}/${this.repo}/contents/${path}?ref=${branch}`
|
||||
|
||||
try {
|
||||
const data = await this.request(endpoint)
|
||||
|
||||
if (!Array.isArray(data)) {
|
||||
throw new Error('Expected directory listing, got single file')
|
||||
}
|
||||
|
||||
return data.map(item => ({
|
||||
name: item.name,
|
||||
path: item.path,
|
||||
type: item.type, // 'file' oder 'dir'
|
||||
size: item.size,
|
||||
sha: item.sha,
|
||||
url: item.url,
|
||||
download_url: item.download_url
|
||||
}))
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to list directory: ${error.message}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Listet alle Posts aus dem Posts-Verzeichnis
|
||||
* @param {string} postsDir - Pfad zum Posts-Verzeichnis
|
||||
* @returns {Promise<Array>} Array von Post-Verzeichnissen
|
||||
*/
|
||||
async listPosts(postsDir = 'Website/content/posts') {
|
||||
try {
|
||||
const contents = await this.listDirectory(postsDir)
|
||||
|
||||
// Nur Verzeichnisse zurückgeben
|
||||
const postDirs = contents.filter(item => item.type === 'dir')
|
||||
|
||||
return postDirs
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to list posts: ${error.message}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ruft index.md aus einem Post-Verzeichnis ab
|
||||
* @param {string} postDir - Post-Verzeichnis (z.B. "2025-04-20-OER-und-Symbole")
|
||||
* @param {string} postsBaseDir - Basis-Pfad (default: "Website/content/posts")
|
||||
* @returns {Promise<string>} Markdown-Content
|
||||
*/
|
||||
async getPostContent(postDir, postsBaseDir = 'Website/content/posts') {
|
||||
const indexPath = `${postsBaseDir}/${postDir}/index.md`
|
||||
return await this.getFileContent(indexPath)
|
||||
}
|
||||
|
||||
/**
|
||||
* Ruft alle Posts mit Content ab
|
||||
* @param {string} postsDir - Posts-Verzeichnis
|
||||
* @returns {Promise<Array>} Array von Posts mit Content
|
||||
*/
|
||||
async getAllPosts(postsDir = 'Website/content/posts') {
|
||||
try {
|
||||
const postDirs = await this.listPosts(postsDir)
|
||||
|
||||
const posts = await Promise.all(
|
||||
postDirs.map(async (dir) => {
|
||||
try {
|
||||
const content = await this.getPostContent(dir.name, postsDir)
|
||||
return {
|
||||
directory: dir.name,
|
||||
path: `${postsDir}/${dir.name}/index.md`,
|
||||
content,
|
||||
metadata: dir
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn(`Failed to fetch post ${dir.name}:`, error.message)
|
||||
return null
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
// Null-Werte filtern (fehlgeschlagene Requests)
|
||||
return posts.filter(post => post !== null)
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to get all posts: ${error.message}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ruft Repository-Informationen ab
|
||||
* @returns {Promise<Object>} Repository-Daten
|
||||
*/
|
||||
async getRepository() {
|
||||
const endpoint = `/repos/${this.owner}/${this.repo}`
|
||||
return await this.request(endpoint)
|
||||
}
|
||||
|
||||
/**
|
||||
* Sucht nach Dateien im Repository
|
||||
* @param {string} query - Suchbegriff
|
||||
* @returns {Promise<Array>} Suchergebnisse
|
||||
*/
|
||||
async searchFiles(query) {
|
||||
const endpoint = `/repos/${this.owner}/${this.repo}/search?q=${encodeURIComponent(query)}`
|
||||
|
||||
try {
|
||||
const data = await this.request(endpoint)
|
||||
return data.data || []
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to search files: ${error.message}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory-Funktion: Erstellt ForgejoClient mit Defaults aus .env
|
||||
* @param {Object} overrides - Optionale Overrides
|
||||
* @returns {ForgejoClient} Konfigurierter Client
|
||||
*/
|
||||
export function createForgejoClient(overrides = {}) {
|
||||
return new ForgejoClient(overrides)
|
||||
}
|
||||
70
src/index.js
Normal file
70
src/index.js
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
/**
|
||||
* MDParser - Main Entry Point
|
||||
* Markdown to JSON Parser für AMB-konforme Inhalte
|
||||
*/
|
||||
|
||||
// Parser
|
||||
export {
|
||||
parseMarkdownFile,
|
||||
parseMarkdownString,
|
||||
astToMarkdown,
|
||||
createMarkdownProcessor,
|
||||
extractHeadings,
|
||||
extractLinks,
|
||||
extractImages
|
||||
} from './parser.js'
|
||||
|
||||
// YAML Extractor
|
||||
export {
|
||||
extractYAML,
|
||||
removeYAML,
|
||||
hasYAML
|
||||
} from './extractors/yaml-extractor.js'
|
||||
|
||||
// AMB Metadata Extractor
|
||||
export {
|
||||
extractAMBMetadata,
|
||||
validateAMBMetadata
|
||||
} from './extractors/amb-extractor.js'
|
||||
|
||||
// Forgejo Client
|
||||
export {
|
||||
ForgejoClient,
|
||||
createForgejoClient
|
||||
} from './forgejo-client.js'
|
||||
|
||||
/**
|
||||
* Convenience-Funktion: Parst Markdown von verschiedenen Quellen
|
||||
* @param {string} source - Dateipfad, URL oder Markdown-String
|
||||
* @param {Object} options - Parser-Optionen
|
||||
* @returns {Promise<Object>} Parsed result
|
||||
*/
|
||||
export async function parse(source, options = {}) {
|
||||
const { parseMarkdownFile, parseMarkdownString } = await import('./parser.js')
|
||||
|
||||
// Prüfe ob es ein Dateipfad ist
|
||||
if (source.startsWith('/') || source.startsWith('./') || source.startsWith('../')) {
|
||||
return parseMarkdownFile(source, options)
|
||||
}
|
||||
|
||||
// Prüfe ob es eine URL ist
|
||||
if (source.startsWith('http://') || source.startsWith('https://')) {
|
||||
const response = await fetch(source)
|
||||
const markdown = await response.text()
|
||||
return parseMarkdownString(markdown, options)
|
||||
}
|
||||
|
||||
// Ansonsten als Markdown-String behandeln
|
||||
return parseMarkdownString(source, options)
|
||||
}
|
||||
|
||||
// Default Export
|
||||
export default {
|
||||
parse,
|
||||
parseMarkdownFile,
|
||||
parseMarkdownString,
|
||||
ForgejoClient,
|
||||
createForgejoClient,
|
||||
extractYAML,
|
||||
extractAMBMetadata
|
||||
}
|
||||
234
src/parser.js
Normal file
234
src/parser.js
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
/**
|
||||
* Core Markdown Parser
|
||||
* Nutzt unified/remark für Markdown-Parsing mit YAML Front Matter
|
||||
*/
|
||||
|
||||
import { unified } from 'unified'
|
||||
import remarkParse from 'remark-parse'
|
||||
import remarkFrontmatter from 'remark-frontmatter'
|
||||
import remarkGfm from 'remark-gfm'
|
||||
import remarkStringify from 'remark-stringify'
|
||||
import { readFile } from 'fs/promises'
|
||||
import { extractYAML, removeYAML } from './extractors/yaml-extractor.js'
|
||||
import { extractAMBMetadata } from './extractors/amb-extractor.js'
|
||||
|
||||
/**
|
||||
* Parst eine Markdown-Datei mit YAML Front Matter
|
||||
* @param {string} filePath - Pfad zur Markdown-Datei
|
||||
* @param {Object} options - Optionale Konfiguration
|
||||
* @param {boolean} options.extractYaml - YAML extrahieren (default: true)
|
||||
* @param {boolean} options.parseGfm - GitHub Flavored Markdown (default: true)
|
||||
* @param {boolean} options.extractAMB - AMB-Metadaten extrahieren (default: true)
|
||||
* @returns {Promise<Object>} Parsed result
|
||||
*/
|
||||
export async function parseMarkdownFile(filePath, options = {}) {
|
||||
const {
|
||||
extractYaml = true,
|
||||
parseGfm = true,
|
||||
extractAMB = true
|
||||
} = options
|
||||
|
||||
try {
|
||||
// Datei einlesen
|
||||
const markdownContent = await readFile(filePath, 'utf-8')
|
||||
|
||||
// Markdown parsen
|
||||
return await parseMarkdownString(markdownContent, {
|
||||
extractYaml,
|
||||
parseGfm,
|
||||
extractAMB
|
||||
})
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to parse Markdown file: ${error.message}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parst einen Markdown-String mit YAML Front Matter
|
||||
* @param {string} markdownContent - Markdown-Content als String
|
||||
* @param {Object} options - Optionale Konfiguration
|
||||
* @returns {Promise<Object>} Parsed result
|
||||
*/
|
||||
export async function parseMarkdownString(markdownContent, options = {}) {
|
||||
const {
|
||||
extractYaml = true,
|
||||
parseGfm = true,
|
||||
extractAMB = true
|
||||
} = options
|
||||
|
||||
const result = {
|
||||
raw: markdownContent,
|
||||
yaml: null,
|
||||
metadata: null,
|
||||
ast: null,
|
||||
content: null
|
||||
}
|
||||
|
||||
try {
|
||||
// YAML Front Matter extrahieren
|
||||
if (extractYaml) {
|
||||
result.yaml = extractYAML(markdownContent)
|
||||
|
||||
// AMB-Metadaten extrahieren
|
||||
if (extractAMB && result.yaml) {
|
||||
result.metadata = extractAMBMetadata(result.yaml)
|
||||
}
|
||||
}
|
||||
|
||||
// Content ohne YAML
|
||||
const contentWithoutYAML = removeYAML(markdownContent)
|
||||
result.content = contentWithoutYAML
|
||||
|
||||
// unified Pipeline aufbauen
|
||||
const processor = unified()
|
||||
.use(remarkParse) // Markdown → AST
|
||||
.use(remarkFrontmatter, ['yaml']) // YAML Front Matter Support
|
||||
|
||||
// Optional: GitHub Flavored Markdown
|
||||
if (parseGfm) {
|
||||
processor.use(remarkGfm)
|
||||
}
|
||||
|
||||
// Markdown parsen → AST
|
||||
const ast = processor.parse(contentWithoutYAML)
|
||||
result.ast = ast
|
||||
|
||||
return result
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to parse Markdown string: ${error.message}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Konvertiert Markdown AST zurück zu Markdown-String
|
||||
* @param {Object} ast - Markdown Abstract Syntax Tree
|
||||
* @returns {Promise<string>} Markdown-String
|
||||
*/
|
||||
export async function astToMarkdown(ast) {
|
||||
const processor = unified()
|
||||
.use(remarkStringify)
|
||||
|
||||
const markdown = processor.stringify(ast)
|
||||
return markdown
|
||||
}
|
||||
|
||||
/**
|
||||
* Erstellt eine vorkonfigurierte unified Pipeline
|
||||
* @param {Object} options - Pipeline-Optionen
|
||||
* @returns {Object} unified Processor
|
||||
*/
|
||||
export function createMarkdownProcessor(options = {}) {
|
||||
const {
|
||||
parseGfm = true,
|
||||
frontmatter = true
|
||||
} = options
|
||||
|
||||
const processor = unified()
|
||||
.use(remarkParse)
|
||||
|
||||
if (frontmatter) {
|
||||
processor.use(remarkFrontmatter, ['yaml'])
|
||||
}
|
||||
|
||||
if (parseGfm) {
|
||||
processor.use(remarkGfm)
|
||||
}
|
||||
|
||||
return processor
|
||||
}
|
||||
|
||||
/**
|
||||
* Extrahiert alle Überschriften aus einem Markdown AST
|
||||
* @param {Object} ast - Markdown AST
|
||||
* @returns {Array} Array von Überschriften mit Level und Text
|
||||
*/
|
||||
export function extractHeadings(ast) {
|
||||
const headings = []
|
||||
|
||||
function visit(node) {
|
||||
if (node.type === 'heading') {
|
||||
headings.push({
|
||||
level: node.depth,
|
||||
text: extractTextFromNode(node)
|
||||
})
|
||||
}
|
||||
|
||||
if (node.children) {
|
||||
node.children.forEach(visit)
|
||||
}
|
||||
}
|
||||
|
||||
visit(ast)
|
||||
return headings
|
||||
}
|
||||
|
||||
/**
|
||||
* Extrahiert alle Links aus einem Markdown AST
|
||||
* @param {Object} ast - Markdown AST
|
||||
* @returns {Array} Array von Links mit URL und Text
|
||||
*/
|
||||
export function extractLinks(ast) {
|
||||
const links = []
|
||||
|
||||
function visit(node) {
|
||||
if (node.type === 'link') {
|
||||
links.push({
|
||||
url: node.url,
|
||||
title: node.title || null,
|
||||
text: extractTextFromNode(node)
|
||||
})
|
||||
}
|
||||
|
||||
if (node.children) {
|
||||
node.children.forEach(visit)
|
||||
}
|
||||
}
|
||||
|
||||
visit(ast)
|
||||
return links
|
||||
}
|
||||
|
||||
/**
|
||||
* Extrahiert alle Bilder aus einem Markdown AST
|
||||
* @param {Object} ast - Markdown AST
|
||||
* @returns {Array} Array von Bildern mit URL, Alt-Text und Title
|
||||
*/
|
||||
export function extractImages(ast) {
|
||||
const images = []
|
||||
|
||||
function visit(node) {
|
||||
if (node.type === 'image') {
|
||||
images.push({
|
||||
url: node.url,
|
||||
alt: node.alt || null,
|
||||
title: node.title || null
|
||||
})
|
||||
}
|
||||
|
||||
if (node.children) {
|
||||
node.children.forEach(visit)
|
||||
}
|
||||
}
|
||||
|
||||
visit(ast)
|
||||
return images
|
||||
}
|
||||
|
||||
/**
|
||||
* Hilfsfunktion: Extrahiert Text aus einem AST-Node
|
||||
* @param {Object} node - AST-Node
|
||||
* @returns {string} Extrahierter Text
|
||||
*/
|
||||
function extractTextFromNode(node) {
|
||||
if (node.type === 'text') {
|
||||
return node.value
|
||||
}
|
||||
|
||||
if (node.children) {
|
||||
return node.children
|
||||
.map(extractTextFromNode)
|
||||
.join('')
|
||||
}
|
||||
|
||||
return ''
|
||||
}
|
||||
66
test/fixtures/example.md
vendored
Normal file
66
test/fixtures/example.md
vendored
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
---
|
||||
commonMetadata:
|
||||
'@context': https://schema.org/
|
||||
creativeWorkStatus: Published
|
||||
type: LearningResource
|
||||
name: Beispiel für OER-Material
|
||||
description: >-
|
||||
Dies ist ein Beispiel für eine Markdown-Datei mit AMB-konformen Metadaten.
|
||||
Sie demonstriert die verschiedenen Felder des AMB-Standards.
|
||||
license: https://creativecommons.org/licenses/by/4.0/deed.de
|
||||
id: https://example.org/beispiel-oer
|
||||
creator:
|
||||
- givenName: Max
|
||||
familyName: Mustermann
|
||||
id: https://orcid.org/0000-0000-0000-0001
|
||||
type: Person
|
||||
affiliation:
|
||||
name: Beispiel-Universität
|
||||
id: https://ror.org/example123
|
||||
type: Organization
|
||||
- givenName: Erika
|
||||
familyName: Musterfrau
|
||||
id: https://orcid.org/0000-0000-0000-0002
|
||||
type: Person
|
||||
inLanguage:
|
||||
- de
|
||||
about:
|
||||
- https://w3id.org/kim/hochschulfaechersystematik/n079
|
||||
image: https://example.org/images/beispiel.jpg
|
||||
learningResourceType:
|
||||
- https://w3id.org/kim/hcrt/text
|
||||
educationalLevel:
|
||||
- https://w3id.org/kim/educationalLevel/level_A
|
||||
datePublished: '2025-10-01'
|
||||
title: Beispiel für OER-Material
|
||||
tags:
|
||||
- OER
|
||||
- Bildung
|
||||
- Beispiel
|
||||
---
|
||||
|
||||
# Beispiel für OER-Material
|
||||
|
||||
## Einleitung
|
||||
|
||||
Dies ist ein **Beispiel** für eine Markdown-Datei mit YAML Front Matter nach AMB-Standard.
|
||||
|
||||
## Hauptinhalt
|
||||
|
||||
### Erste Unterüberschrift
|
||||
|
||||
Hier ist etwas *Text* mit verschiedenen Formatierungen:
|
||||
|
||||
- Liste Item 1
|
||||
- Liste Item 2
|
||||
- Liste Item 3
|
||||
|
||||
### Zweite Unterüberschrift
|
||||
|
||||
Ein Link zu [GitHub](https://github.com) und ein Bild:
|
||||
|
||||

|
||||
|
||||
## Fazit
|
||||
|
||||
Das war ein einfaches Beispiel für strukturierte Bildungsinhalte.
|
||||
184
test/parser.test.js
Normal file
184
test/parser.test.js
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
/**
|
||||
* Tests für MDParser
|
||||
* Nutzt Node.js native test runner
|
||||
*/
|
||||
|
||||
import { test } from 'node:test'
|
||||
import assert from 'node:assert'
|
||||
import { parseMarkdownFile, parseMarkdownString } from '../src/parser.js'
|
||||
import { extractYAML, hasYAML, removeYAML } from '../src/extractors/yaml-extractor.js'
|
||||
import { extractAMBMetadata, validateAMBMetadata } from '../src/extractors/amb-extractor.js'
|
||||
import { join, dirname } from 'path'
|
||||
import { fileURLToPath } from 'url'
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url))
|
||||
|
||||
test('YAML Extractor: extrahiert YAML Front Matter', () => {
|
||||
const markdown = `---
|
||||
title: Test
|
||||
author: Max
|
||||
---
|
||||
# Content`
|
||||
|
||||
const yaml = extractYAML(markdown)
|
||||
|
||||
assert.ok(yaml, 'YAML sollte extrahiert werden')
|
||||
assert.strictEqual(yaml.title, 'Test')
|
||||
assert.strictEqual(yaml.author, 'Max')
|
||||
})
|
||||
|
||||
test('YAML Extractor: erkennt YAML Front Matter', () => {
|
||||
const withYAML = `---\ntitle: Test\n---\nContent`
|
||||
const withoutYAML = `# Heading\nContent`
|
||||
|
||||
assert.strictEqual(hasYAML(withYAML), true)
|
||||
assert.strictEqual(hasYAML(withoutYAML), false)
|
||||
})
|
||||
|
||||
test('YAML Extractor: entfernt YAML Front Matter', () => {
|
||||
const markdown = `---
|
||||
title: Test
|
||||
---
|
||||
# Content`
|
||||
|
||||
const result = removeYAML(markdown)
|
||||
|
||||
assert.ok(!result.includes('---'), 'YAML sollte entfernt sein')
|
||||
assert.ok(result.includes('# Content'), 'Content sollte bleiben')
|
||||
})
|
||||
|
||||
test('AMB Extractor: extrahiert Metadaten aus YAML', () => {
|
||||
const yaml = {
|
||||
commonMetadata: {
|
||||
'@context': 'https://schema.org/',
|
||||
type: 'LearningResource',
|
||||
name: 'Test Resource',
|
||||
description: 'Test Description',
|
||||
license: 'https://creativecommons.org/licenses/by/4.0/',
|
||||
datePublished: '2025-10-01',
|
||||
creator: [{
|
||||
givenName: 'Max',
|
||||
familyName: 'Mustermann',
|
||||
type: 'Person'
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
const metadata = extractAMBMetadata(yaml)
|
||||
|
||||
assert.strictEqual(metadata.name, 'Test Resource')
|
||||
assert.strictEqual(metadata.description, 'Test Description')
|
||||
assert.strictEqual(metadata.type, 'LearningResource')
|
||||
assert.ok(metadata.creator, 'Creator sollte vorhanden sein')
|
||||
assert.strictEqual(metadata.creator[0].name, 'Max Mustermann')
|
||||
})
|
||||
|
||||
test('AMB Extractor: verwendet Fallbacks', () => {
|
||||
const yaml = {
|
||||
title: 'Fallback Title',
|
||||
summary: 'Fallback Description',
|
||||
author: 'Max Mustermann'
|
||||
}
|
||||
|
||||
const metadata = extractAMBMetadata(yaml)
|
||||
|
||||
assert.strictEqual(metadata.name, 'Fallback Title')
|
||||
assert.strictEqual(metadata.description, 'Fallback Description')
|
||||
assert.ok(metadata._warnings, 'Warnings sollten vorhanden sein')
|
||||
assert.ok(metadata._warnings.length > 0, 'Es sollten Warnings existieren')
|
||||
})
|
||||
|
||||
test('AMB Extractor: validiert Metadaten', () => {
|
||||
const completeMetadata = {
|
||||
name: 'Test',
|
||||
description: 'Description',
|
||||
license: 'CC-BY-4.0',
|
||||
creator: [{ name: 'Max' }],
|
||||
datePublished: '2025-10-01',
|
||||
about: ['topic'],
|
||||
id: 'https://example.org/test'
|
||||
}
|
||||
|
||||
const validation = validateAMBMetadata(completeMetadata)
|
||||
|
||||
assert.strictEqual(validation.valid, true)
|
||||
assert.strictEqual(validation.errors.length, 0)
|
||||
})
|
||||
|
||||
test('Parser: parst Markdown-String', async () => {
|
||||
const markdown = `---
|
||||
title: Test
|
||||
---
|
||||
# Heading
|
||||
|
||||
Some **bold** text.`
|
||||
|
||||
const result = await parseMarkdownString(markdown)
|
||||
|
||||
assert.ok(result.yaml, 'YAML sollte extrahiert sein')
|
||||
assert.ok(result.ast, 'AST sollte existieren')
|
||||
assert.ok(result.content, 'Content sollte existieren')
|
||||
assert.strictEqual(result.yaml.title, 'Test')
|
||||
})
|
||||
|
||||
test('Parser: parst lokale Markdown-Datei', async () => {
|
||||
const filePath = join(__dirname, 'fixtures/example.md')
|
||||
|
||||
const result = await parseMarkdownFile(filePath)
|
||||
|
||||
assert.ok(result.yaml, 'YAML sollte extrahiert sein')
|
||||
assert.ok(result.metadata, 'Metadaten sollten extrahiert sein')
|
||||
assert.ok(result.ast, 'AST sollte existieren')
|
||||
|
||||
// Prüfe AMB-Metadaten
|
||||
assert.strictEqual(result.metadata.name, 'Beispiel für OER-Material')
|
||||
assert.strictEqual(result.metadata.type, 'LearningResource')
|
||||
assert.ok(result.metadata.creator, 'Creator sollte vorhanden sein')
|
||||
assert.strictEqual(result.metadata.creator.length, 2, 'Sollte 2 Creators haben')
|
||||
})
|
||||
|
||||
test('Parser: extrahiert Überschriften', async () => {
|
||||
const markdown = `# H1
|
||||
## H2
|
||||
### H3`
|
||||
|
||||
const result = await parseMarkdownString(markdown)
|
||||
const { extractHeadings } = await import('../src/parser.js')
|
||||
const headings = extractHeadings(result.ast)
|
||||
|
||||
assert.strictEqual(headings.length, 3)
|
||||
assert.strictEqual(headings[0].level, 1)
|
||||
assert.strictEqual(headings[0].text, 'H1')
|
||||
assert.strictEqual(headings[1].level, 2)
|
||||
assert.strictEqual(headings[2].level, 3)
|
||||
})
|
||||
|
||||
test('Parser: extrahiert Links', async () => {
|
||||
const markdown = `[Link 1](https://example.com)
|
||||
[Link 2](https://github.com "GitHub")`
|
||||
|
||||
const result = await parseMarkdownString(markdown)
|
||||
const { extractLinks } = await import('../src/parser.js')
|
||||
const links = extractLinks(result.ast)
|
||||
|
||||
assert.strictEqual(links.length, 2)
|
||||
assert.strictEqual(links[0].url, 'https://example.com')
|
||||
assert.strictEqual(links[0].text, 'Link 1')
|
||||
assert.strictEqual(links[1].url, 'https://github.com')
|
||||
assert.strictEqual(links[1].title, 'GitHub')
|
||||
})
|
||||
|
||||
test('Parser: extrahiert Bilder', async () => {
|
||||
const markdown = ``
|
||||
|
||||
const result = await parseMarkdownString(markdown)
|
||||
const { extractImages } = await import('../src/parser.js')
|
||||
const images = extractImages(result.ast)
|
||||
|
||||
assert.strictEqual(images.length, 1)
|
||||
assert.strictEqual(images[0].url, 'image.png')
|
||||
assert.strictEqual(images[0].alt, 'Alt Text')
|
||||
assert.strictEqual(images[0].title, 'Title')
|
||||
})
|
||||
|
||||
console.log('✅ Alle Tests erfolgreich!')
|
||||
Loading…
Add table
Add a link
Reference in a new issue