mirror of
https://github.com/edufeed-org/oersi-utils.git
synced 2025-12-07 23:34:35 +00:00
Initial commit
This commit is contained in:
commit
038a0a959c
6 changed files with 230 additions and 0 deletions
62
.clj-kondo/imports/potemkin/potemkin/config.edn
Normal file
62
.clj-kondo/imports/potemkin/potemkin/config.edn
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
{:lint-as {potemkin.collections/compile-if clojure.core/if
|
||||
potemkin.collections/reify-map-type clojure.core/reify
|
||||
potemkin.collections/def-map-type clj-kondo.lint-as/def-catch-all
|
||||
potemkin.collections/def-derived-map clj-kondo.lint-as/def-catch-all
|
||||
|
||||
potemkin.types/reify+ clojure.core/reify
|
||||
potemkin.types/defprotocol+ clojure.core/defprotocol
|
||||
potemkin.types/deftype+ clojure.core/deftype
|
||||
potemkin.types/defrecord+ clojure.core/defrecord
|
||||
potemkin.types/definterface+ clojure.core/defprotocol
|
||||
potemkin.types/extend-protocol+ clojure.core/extend-protocol
|
||||
potemkin.types/def-abstract-type clj-kondo.lint-as/def-catch-all
|
||||
|
||||
potemkin.utils/doit clojure.core/doseq
|
||||
potemkin.utils/doary clojure.core/doseq
|
||||
potemkin.utils/condp-case clojure.core/condp
|
||||
potemkin.utils/fast-bound-fn clojure.core/bound-fn
|
||||
|
||||
potemkin.walk/prewalk clojure.walk/prewalk
|
||||
potemkin.walk/postwalk clojure.walk/postwalk
|
||||
potemkin.walk/walk clojure.walk/walk
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;; top-level from import-vars
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Have hooks
|
||||
;;potemkin/import-fn potemkin.namespaces/import-fn
|
||||
;;potemkin/import-macro potemkin.namespaces/import-macro
|
||||
;;potemkin/import-def potemkin.namespaces/import-def
|
||||
|
||||
;; Internal, not transitive
|
||||
;;potemkin/unify-gensyms potemkin.macros/unify-gensyms
|
||||
;;potemkin/normalize-gensyms potemkin.macros/normalize-gensyms
|
||||
;;potemkin/equivalent? potemkin.macros/equivalent?
|
||||
|
||||
potemkin/condp-case clojure.core/condp
|
||||
potemkin/doit potemkin.utils/doit
|
||||
potemkin/doary potemkin.utils/doary
|
||||
|
||||
potemkin/def-abstract-type clj-kondo.lint-as/def-catch-all
|
||||
potemkin/reify+ clojure.core/reify
|
||||
potemkin/defprotocol+ clojure.core/defprotocol
|
||||
potemkin/deftype+ clojure.core/deftype
|
||||
potemkin/defrecord+ clojure.core/defrecord
|
||||
potemkin/definterface+ clojure.core/defprotocol
|
||||
potemkin/extend-protocol+ clojure.core/extend-protocol
|
||||
|
||||
potemkin/reify-map-type clojure.core/reify
|
||||
potemkin/def-derived-map clj-kondo.lint-as/def-catch-all
|
||||
potemkin/def-map-type clj-kondo.lint-as/def-catch-all}
|
||||
|
||||
;; leave import-vars alone, kondo special-cases it
|
||||
:hooks {:macroexpand {#_#_potemkin.namespaces/import-vars potemkin.namespaces/import-vars
|
||||
potemkin.namespaces/import-fn potemkin.namespaces/import-fn
|
||||
potemkin.namespaces/import-macro potemkin.namespaces/import-macro
|
||||
potemkin.namespaces/import-def potemkin.namespaces/import-def
|
||||
|
||||
#_#_potemkin/import-vars potemkin.namespaces/import-vars
|
||||
potemkin/import-fn potemkin.namespaces/import-fn
|
||||
potemkin/import-macro potemkin.namespaces/import-macro
|
||||
potemkin/import-def potemkin.namespaces/import-def}}}
|
||||
56
.clj-kondo/imports/potemkin/potemkin/potemkin/namespaces.clj
Normal file
56
.clj-kondo/imports/potemkin/potemkin/potemkin/namespaces.clj
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
(ns potemkin.namespaces
|
||||
(:require [clj-kondo.hooks-api :as api]))
|
||||
|
||||
(defn import-macro*
|
||||
([sym]
|
||||
`(def ~(-> sym name symbol) ~sym))
|
||||
([sym name]
|
||||
`(def ~name ~sym)))
|
||||
|
||||
(defmacro import-fn
|
||||
([sym]
|
||||
(import-macro* sym))
|
||||
([sym name]
|
||||
(import-macro* sym name)))
|
||||
|
||||
(defmacro import-macro
|
||||
([sym]
|
||||
(import-macro* sym))
|
||||
([sym name]
|
||||
(import-macro* sym name)))
|
||||
|
||||
(defmacro import-def
|
||||
([sym]
|
||||
(import-macro* sym))
|
||||
([sym name]
|
||||
(import-macro* sym name)))
|
||||
|
||||
#_
|
||||
(defmacro import-vars
|
||||
"Imports a list of vars from other namespaces."
|
||||
[& syms]
|
||||
(let [unravel (fn unravel [x]
|
||||
(if (sequential? x)
|
||||
(->> x
|
||||
rest
|
||||
(mapcat unravel)
|
||||
(map
|
||||
#(symbol
|
||||
(str (first x)
|
||||
(when-let [n (namespace %)]
|
||||
(str "." n)))
|
||||
(name %))))
|
||||
[x]))
|
||||
syms (mapcat unravel syms)
|
||||
result `(do
|
||||
~@(map
|
||||
(fn [sym]
|
||||
(let [vr (resolve sym)
|
||||
m (meta vr)]
|
||||
(cond
|
||||
(nil? vr) `(throw (ex-info (format "`%s` does not exist" '~sym) {}))
|
||||
(:macro m) `(def ~(-> sym name symbol) ~sym)
|
||||
(:arglists m) `(def ~(-> sym name symbol) ~sym)
|
||||
:else `(def ~(-> sym name symbol) ~sym))))
|
||||
syms))]
|
||||
result))
|
||||
30
.gitignore
vendored
Normal file
30
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
.calva/output-window/
|
||||
.calva/repl.calva-repl
|
||||
.classpath
|
||||
.clj-kondo/.cache
|
||||
.cpcache
|
||||
.eastwood
|
||||
.factorypath
|
||||
.hg/
|
||||
.hgignore
|
||||
.java-version
|
||||
.lein-*
|
||||
.lsp/.cache
|
||||
.lsp/sqlite.db
|
||||
.nrepl-history
|
||||
.nrepl-port
|
||||
.portal/vs-code.edn
|
||||
.project
|
||||
.rebel_readline_history
|
||||
.settings
|
||||
.socket-repl-port
|
||||
.sw*
|
||||
.vscode
|
||||
*.class
|
||||
*.jar
|
||||
*.swp
|
||||
*~
|
||||
/checkouts
|
||||
/classes
|
||||
/target
|
||||
|
||||
7
README.md
Normal file
7
README.md
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
# OERSI Utils
|
||||
|
||||
Some utilities to work with the open endpoints of OERSI, best run in the REPL at the moment.
|
||||
|
||||
Based on this documentation of OERSI: https://pad.gwdg.de/s/oersi-faq#Beispiel-Datenabzug
|
||||
|
||||
|
||||
3
deps.edn
Normal file
3
deps.edn
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
{:deps {clj-http/clj-http {:mvn/version "3.13.0"}
|
||||
cheshire/cheshire {:mvn/version "5.13.0"}
|
||||
nostr/nostr {:local/root "/home/steffen/coding/nostr-clj"}}}
|
||||
72
src/oersi/core.clj
Normal file
72
src/oersi/core.clj
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
(ns oersi.core
|
||||
(:require [clj-http.client :as http]
|
||||
[cheshire.core :as json]
|
||||
[clojure.java.io :as io]
|
||||
[nostr.edufeed :as edufeed]))
|
||||
|
||||
(defn fetch-data [pit-id last-sort-value]
|
||||
(let [url "https://oersi.org/resources/api-internal/search/_search?pretty"
|
||||
_ (println last-sort-value)
|
||||
query (merge {:size 1000
|
||||
:query {:match {:mainEntityOfPage.provider.name "twillo"}}
|
||||
:pit {:id pit-id
|
||||
:keep_alive "1m"}
|
||||
:sort [{:id "asc"}]
|
||||
:track_total_hits true}
|
||||
(when last-sort-value
|
||||
{:search_after last-sort-value}))
|
||||
response (http/post url
|
||||
{:accept :json
|
||||
:content-type :json
|
||||
:body (json/generate-string query)})]
|
||||
(json/parse-string (:body response) true)))
|
||||
|
||||
(defn save-hits [hits]
|
||||
(println "Got " (count hits) "results"))
|
||||
|
||||
;; Function to save a batch of hits to a JSON line file
|
||||
(defn save-to-jsonl [data file-path]
|
||||
(with-open [writer (io/writer file-path :append true)]
|
||||
(doseq [record (:hits (:hits data))]
|
||||
(.write writer (str (json/generate-string record) "\n")))))
|
||||
|
||||
(defn crawl-oersi [args]
|
||||
(println "Crawl oersi" args)
|
||||
(let [output-file "oersi_data.jsonl"
|
||||
pit (http/post "https://oersi.org/resources/api-internal/search/oer_data/_pit?keep_alive=1m&pretty"
|
||||
{:accept :json})
|
||||
pit-id (-> pit :body (#(json/parse-string % true)) :id)]
|
||||
(println "Generated PIT: " pit-id)
|
||||
(loop [last-sort-value nil]
|
||||
(let [body (fetch-data pit-id last-sort-value)
|
||||
hits (-> body :hits :hits)]
|
||||
(save-to-jsonl body output-file)
|
||||
(if-not (empty? hits)
|
||||
(recur (get (last hits) :sort))
|
||||
(println "no more records to fetch"))))))
|
||||
|
||||
(defn search-oersi [args]
|
||||
(let [url "https://oersi.org/resources/api-internal/search/oer_data/_search?pretty"
|
||||
query-2 {:size 1
|
||||
:from 0
|
||||
:query {:match_all {}}}
|
||||
query {:size 20
|
||||
:from 0
|
||||
:query {:multi_match {:query "Klimawandel"
|
||||
:fields ["name", "description", "keywords"]}}
|
||||
:sort [{:id "asc"}]}
|
||||
response (http/post url
|
||||
{:content-type :json
|
||||
:accept :json
|
||||
:body (json/generate-string query-2)})]
|
||||
(println response)))
|
||||
|
||||
;; FIXME read file, and then process line by line
|
||||
(defn export-to-nostr [args]
|
||||
(println args)
|
||||
(let [file-path (:file-path args)
|
||||
_ (println "file path" file-path)]
|
||||
(edufeed/transform-amb-to-30142-event)))
|
||||
|
||||
(defn -main []
|
||||
(println "Hello world"))
|
||||
Loading…
Add table
Add a link
Reference in a new issue