mirror of
https://github.com/edufeed-org/oersi-utils.git
synced 2025-12-09 16:24:35 +00:00
Initial commit
This commit is contained in:
commit
038a0a959c
6 changed files with 230 additions and 0 deletions
62
.clj-kondo/imports/potemkin/potemkin/config.edn
Normal file
62
.clj-kondo/imports/potemkin/potemkin/config.edn
Normal file
|
|
@ -0,0 +1,62 @@
|
||||||
|
{:lint-as {potemkin.collections/compile-if clojure.core/if
|
||||||
|
potemkin.collections/reify-map-type clojure.core/reify
|
||||||
|
potemkin.collections/def-map-type clj-kondo.lint-as/def-catch-all
|
||||||
|
potemkin.collections/def-derived-map clj-kondo.lint-as/def-catch-all
|
||||||
|
|
||||||
|
potemkin.types/reify+ clojure.core/reify
|
||||||
|
potemkin.types/defprotocol+ clojure.core/defprotocol
|
||||||
|
potemkin.types/deftype+ clojure.core/deftype
|
||||||
|
potemkin.types/defrecord+ clojure.core/defrecord
|
||||||
|
potemkin.types/definterface+ clojure.core/defprotocol
|
||||||
|
potemkin.types/extend-protocol+ clojure.core/extend-protocol
|
||||||
|
potemkin.types/def-abstract-type clj-kondo.lint-as/def-catch-all
|
||||||
|
|
||||||
|
potemkin.utils/doit clojure.core/doseq
|
||||||
|
potemkin.utils/doary clojure.core/doseq
|
||||||
|
potemkin.utils/condp-case clojure.core/condp
|
||||||
|
potemkin.utils/fast-bound-fn clojure.core/bound-fn
|
||||||
|
|
||||||
|
potemkin.walk/prewalk clojure.walk/prewalk
|
||||||
|
potemkin.walk/postwalk clojure.walk/postwalk
|
||||||
|
potemkin.walk/walk clojure.walk/walk
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;; top-level from import-vars
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;; Have hooks
|
||||||
|
;;potemkin/import-fn potemkin.namespaces/import-fn
|
||||||
|
;;potemkin/import-macro potemkin.namespaces/import-macro
|
||||||
|
;;potemkin/import-def potemkin.namespaces/import-def
|
||||||
|
|
||||||
|
;; Internal, not transitive
|
||||||
|
;;potemkin/unify-gensyms potemkin.macros/unify-gensyms
|
||||||
|
;;potemkin/normalize-gensyms potemkin.macros/normalize-gensyms
|
||||||
|
;;potemkin/equivalent? potemkin.macros/equivalent?
|
||||||
|
|
||||||
|
potemkin/condp-case clojure.core/condp
|
||||||
|
potemkin/doit potemkin.utils/doit
|
||||||
|
potemkin/doary potemkin.utils/doary
|
||||||
|
|
||||||
|
potemkin/def-abstract-type clj-kondo.lint-as/def-catch-all
|
||||||
|
potemkin/reify+ clojure.core/reify
|
||||||
|
potemkin/defprotocol+ clojure.core/defprotocol
|
||||||
|
potemkin/deftype+ clojure.core/deftype
|
||||||
|
potemkin/defrecord+ clojure.core/defrecord
|
||||||
|
potemkin/definterface+ clojure.core/defprotocol
|
||||||
|
potemkin/extend-protocol+ clojure.core/extend-protocol
|
||||||
|
|
||||||
|
potemkin/reify-map-type clojure.core/reify
|
||||||
|
potemkin/def-derived-map clj-kondo.lint-as/def-catch-all
|
||||||
|
potemkin/def-map-type clj-kondo.lint-as/def-catch-all}
|
||||||
|
|
||||||
|
;; leave import-vars alone, kondo special-cases it
|
||||||
|
:hooks {:macroexpand {#_#_potemkin.namespaces/import-vars potemkin.namespaces/import-vars
|
||||||
|
potemkin.namespaces/import-fn potemkin.namespaces/import-fn
|
||||||
|
potemkin.namespaces/import-macro potemkin.namespaces/import-macro
|
||||||
|
potemkin.namespaces/import-def potemkin.namespaces/import-def
|
||||||
|
|
||||||
|
#_#_potemkin/import-vars potemkin.namespaces/import-vars
|
||||||
|
potemkin/import-fn potemkin.namespaces/import-fn
|
||||||
|
potemkin/import-macro potemkin.namespaces/import-macro
|
||||||
|
potemkin/import-def potemkin.namespaces/import-def}}}
|
||||||
56
.clj-kondo/imports/potemkin/potemkin/potemkin/namespaces.clj
Normal file
56
.clj-kondo/imports/potemkin/potemkin/potemkin/namespaces.clj
Normal file
|
|
@ -0,0 +1,56 @@
|
||||||
|
(ns potemkin.namespaces
|
||||||
|
(:require [clj-kondo.hooks-api :as api]))
|
||||||
|
|
||||||
|
(defn import-macro*
|
||||||
|
([sym]
|
||||||
|
`(def ~(-> sym name symbol) ~sym))
|
||||||
|
([sym name]
|
||||||
|
`(def ~name ~sym)))
|
||||||
|
|
||||||
|
(defmacro import-fn
|
||||||
|
([sym]
|
||||||
|
(import-macro* sym))
|
||||||
|
([sym name]
|
||||||
|
(import-macro* sym name)))
|
||||||
|
|
||||||
|
(defmacro import-macro
|
||||||
|
([sym]
|
||||||
|
(import-macro* sym))
|
||||||
|
([sym name]
|
||||||
|
(import-macro* sym name)))
|
||||||
|
|
||||||
|
(defmacro import-def
|
||||||
|
([sym]
|
||||||
|
(import-macro* sym))
|
||||||
|
([sym name]
|
||||||
|
(import-macro* sym name)))
|
||||||
|
|
||||||
|
#_
|
||||||
|
(defmacro import-vars
|
||||||
|
"Imports a list of vars from other namespaces."
|
||||||
|
[& syms]
|
||||||
|
(let [unravel (fn unravel [x]
|
||||||
|
(if (sequential? x)
|
||||||
|
(->> x
|
||||||
|
rest
|
||||||
|
(mapcat unravel)
|
||||||
|
(map
|
||||||
|
#(symbol
|
||||||
|
(str (first x)
|
||||||
|
(when-let [n (namespace %)]
|
||||||
|
(str "." n)))
|
||||||
|
(name %))))
|
||||||
|
[x]))
|
||||||
|
syms (mapcat unravel syms)
|
||||||
|
result `(do
|
||||||
|
~@(map
|
||||||
|
(fn [sym]
|
||||||
|
(let [vr (resolve sym)
|
||||||
|
m (meta vr)]
|
||||||
|
(cond
|
||||||
|
(nil? vr) `(throw (ex-info (format "`%s` does not exist" '~sym) {}))
|
||||||
|
(:macro m) `(def ~(-> sym name symbol) ~sym)
|
||||||
|
(:arglists m) `(def ~(-> sym name symbol) ~sym)
|
||||||
|
:else `(def ~(-> sym name symbol) ~sym))))
|
||||||
|
syms))]
|
||||||
|
result))
|
||||||
30
.gitignore
vendored
Normal file
30
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
.calva/output-window/
|
||||||
|
.calva/repl.calva-repl
|
||||||
|
.classpath
|
||||||
|
.clj-kondo/.cache
|
||||||
|
.cpcache
|
||||||
|
.eastwood
|
||||||
|
.factorypath
|
||||||
|
.hg/
|
||||||
|
.hgignore
|
||||||
|
.java-version
|
||||||
|
.lein-*
|
||||||
|
.lsp/.cache
|
||||||
|
.lsp/sqlite.db
|
||||||
|
.nrepl-history
|
||||||
|
.nrepl-port
|
||||||
|
.portal/vs-code.edn
|
||||||
|
.project
|
||||||
|
.rebel_readline_history
|
||||||
|
.settings
|
||||||
|
.socket-repl-port
|
||||||
|
.sw*
|
||||||
|
.vscode
|
||||||
|
*.class
|
||||||
|
*.jar
|
||||||
|
*.swp
|
||||||
|
*~
|
||||||
|
/checkouts
|
||||||
|
/classes
|
||||||
|
/target
|
||||||
|
|
||||||
7
README.md
Normal file
7
README.md
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
# OERSI Utils
|
||||||
|
|
||||||
|
Some utilities to work with the open endpoints of OERSI, best run in the REPL at the moment.
|
||||||
|
|
||||||
|
Based on this documentation of OERSI: https://pad.gwdg.de/s/oersi-faq#Beispiel-Datenabzug
|
||||||
|
|
||||||
|
|
||||||
3
deps.edn
Normal file
3
deps.edn
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
{:deps {clj-http/clj-http {:mvn/version "3.13.0"}
|
||||||
|
cheshire/cheshire {:mvn/version "5.13.0"}
|
||||||
|
nostr/nostr {:local/root "/home/steffen/coding/nostr-clj"}}}
|
||||||
72
src/oersi/core.clj
Normal file
72
src/oersi/core.clj
Normal file
|
|
@ -0,0 +1,72 @@
|
||||||
|
(ns oersi.core
|
||||||
|
(:require [clj-http.client :as http]
|
||||||
|
[cheshire.core :as json]
|
||||||
|
[clojure.java.io :as io]
|
||||||
|
[nostr.edufeed :as edufeed]))
|
||||||
|
|
||||||
|
(defn fetch-data [pit-id last-sort-value]
|
||||||
|
(let [url "https://oersi.org/resources/api-internal/search/_search?pretty"
|
||||||
|
_ (println last-sort-value)
|
||||||
|
query (merge {:size 1000
|
||||||
|
:query {:match {:mainEntityOfPage.provider.name "twillo"}}
|
||||||
|
:pit {:id pit-id
|
||||||
|
:keep_alive "1m"}
|
||||||
|
:sort [{:id "asc"}]
|
||||||
|
:track_total_hits true}
|
||||||
|
(when last-sort-value
|
||||||
|
{:search_after last-sort-value}))
|
||||||
|
response (http/post url
|
||||||
|
{:accept :json
|
||||||
|
:content-type :json
|
||||||
|
:body (json/generate-string query)})]
|
||||||
|
(json/parse-string (:body response) true)))
|
||||||
|
|
||||||
|
(defn save-hits [hits]
|
||||||
|
(println "Got " (count hits) "results"))
|
||||||
|
|
||||||
|
;; Function to save a batch of hits to a JSON line file
|
||||||
|
(defn save-to-jsonl [data file-path]
|
||||||
|
(with-open [writer (io/writer file-path :append true)]
|
||||||
|
(doseq [record (:hits (:hits data))]
|
||||||
|
(.write writer (str (json/generate-string record) "\n")))))
|
||||||
|
|
||||||
|
(defn crawl-oersi [args]
|
||||||
|
(println "Crawl oersi" args)
|
||||||
|
(let [output-file "oersi_data.jsonl"
|
||||||
|
pit (http/post "https://oersi.org/resources/api-internal/search/oer_data/_pit?keep_alive=1m&pretty"
|
||||||
|
{:accept :json})
|
||||||
|
pit-id (-> pit :body (#(json/parse-string % true)) :id)]
|
||||||
|
(println "Generated PIT: " pit-id)
|
||||||
|
(loop [last-sort-value nil]
|
||||||
|
(let [body (fetch-data pit-id last-sort-value)
|
||||||
|
hits (-> body :hits :hits)]
|
||||||
|
(save-to-jsonl body output-file)
|
||||||
|
(if-not (empty? hits)
|
||||||
|
(recur (get (last hits) :sort))
|
||||||
|
(println "no more records to fetch"))))))
|
||||||
|
|
||||||
|
(defn search-oersi [args]
|
||||||
|
(let [url "https://oersi.org/resources/api-internal/search/oer_data/_search?pretty"
|
||||||
|
query-2 {:size 1
|
||||||
|
:from 0
|
||||||
|
:query {:match_all {}}}
|
||||||
|
query {:size 20
|
||||||
|
:from 0
|
||||||
|
:query {:multi_match {:query "Klimawandel"
|
||||||
|
:fields ["name", "description", "keywords"]}}
|
||||||
|
:sort [{:id "asc"}]}
|
||||||
|
response (http/post url
|
||||||
|
{:content-type :json
|
||||||
|
:accept :json
|
||||||
|
:body (json/generate-string query-2)})]
|
||||||
|
(println response)))
|
||||||
|
|
||||||
|
;; FIXME read file, and then process line by line
|
||||||
|
(defn export-to-nostr [args]
|
||||||
|
(println args)
|
||||||
|
(let [file-path (:file-path args)
|
||||||
|
_ (println "file path" file-path)]
|
||||||
|
(edufeed/transform-amb-to-30142-event)))
|
||||||
|
|
||||||
|
(defn -main []
|
||||||
|
(println "Hello world"))
|
||||||
Loading…
Add table
Add a link
Reference in a new issue