From 038a0a959c2f61adf22ebecc353e3c3473565c82 Mon Sep 17 00:00:00 2001 From: "@s.roertgen" Date: Tue, 31 Dec 2024 11:19:00 +0100 Subject: [PATCH] Initial commit --- .../imports/potemkin/potemkin/config.edn | 62 ++++++++++++++++ .../potemkin/potemkin/potemkin/namespaces.clj | 56 +++++++++++++++ .gitignore | 30 ++++++++ README.md | 7 ++ deps.edn | 3 + src/oersi/core.clj | 72 +++++++++++++++++++ 6 files changed, 230 insertions(+) create mode 100644 .clj-kondo/imports/potemkin/potemkin/config.edn create mode 100644 .clj-kondo/imports/potemkin/potemkin/potemkin/namespaces.clj create mode 100644 .gitignore create mode 100644 README.md create mode 100644 deps.edn create mode 100644 src/oersi/core.clj diff --git a/.clj-kondo/imports/potemkin/potemkin/config.edn b/.clj-kondo/imports/potemkin/potemkin/config.edn new file mode 100644 index 0000000..3f59f3e --- /dev/null +++ b/.clj-kondo/imports/potemkin/potemkin/config.edn @@ -0,0 +1,62 @@ +{:lint-as {potemkin.collections/compile-if clojure.core/if + potemkin.collections/reify-map-type clojure.core/reify + potemkin.collections/def-map-type clj-kondo.lint-as/def-catch-all + potemkin.collections/def-derived-map clj-kondo.lint-as/def-catch-all + + potemkin.types/reify+ clojure.core/reify + potemkin.types/defprotocol+ clojure.core/defprotocol + potemkin.types/deftype+ clojure.core/deftype + potemkin.types/defrecord+ clojure.core/defrecord + potemkin.types/definterface+ clojure.core/defprotocol + potemkin.types/extend-protocol+ clojure.core/extend-protocol + potemkin.types/def-abstract-type clj-kondo.lint-as/def-catch-all + + potemkin.utils/doit clojure.core/doseq + potemkin.utils/doary clojure.core/doseq + potemkin.utils/condp-case clojure.core/condp + potemkin.utils/fast-bound-fn clojure.core/bound-fn + + potemkin.walk/prewalk clojure.walk/prewalk + potemkin.walk/postwalk clojure.walk/postwalk + potemkin.walk/walk clojure.walk/walk + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;; top-level from import-vars + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ;; Have hooks + ;;potemkin/import-fn potemkin.namespaces/import-fn + ;;potemkin/import-macro potemkin.namespaces/import-macro + ;;potemkin/import-def potemkin.namespaces/import-def + + ;; Internal, not transitive + ;;potemkin/unify-gensyms potemkin.macros/unify-gensyms + ;;potemkin/normalize-gensyms potemkin.macros/normalize-gensyms + ;;potemkin/equivalent? potemkin.macros/equivalent? + + potemkin/condp-case clojure.core/condp + potemkin/doit potemkin.utils/doit + potemkin/doary potemkin.utils/doary + + potemkin/def-abstract-type clj-kondo.lint-as/def-catch-all + potemkin/reify+ clojure.core/reify + potemkin/defprotocol+ clojure.core/defprotocol + potemkin/deftype+ clojure.core/deftype + potemkin/defrecord+ clojure.core/defrecord + potemkin/definterface+ clojure.core/defprotocol + potemkin/extend-protocol+ clojure.core/extend-protocol + + potemkin/reify-map-type clojure.core/reify + potemkin/def-derived-map clj-kondo.lint-as/def-catch-all + potemkin/def-map-type clj-kondo.lint-as/def-catch-all} + + ;; leave import-vars alone, kondo special-cases it + :hooks {:macroexpand {#_#_potemkin.namespaces/import-vars potemkin.namespaces/import-vars + potemkin.namespaces/import-fn potemkin.namespaces/import-fn + potemkin.namespaces/import-macro potemkin.namespaces/import-macro + potemkin.namespaces/import-def potemkin.namespaces/import-def + + #_#_potemkin/import-vars potemkin.namespaces/import-vars + potemkin/import-fn potemkin.namespaces/import-fn + potemkin/import-macro potemkin.namespaces/import-macro + potemkin/import-def potemkin.namespaces/import-def}}} diff --git a/.clj-kondo/imports/potemkin/potemkin/potemkin/namespaces.clj b/.clj-kondo/imports/potemkin/potemkin/potemkin/namespaces.clj new file mode 100644 index 0000000..a247af5 --- /dev/null +++ b/.clj-kondo/imports/potemkin/potemkin/potemkin/namespaces.clj @@ -0,0 +1,56 @@ +(ns potemkin.namespaces + (:require [clj-kondo.hooks-api :as api])) + +(defn import-macro* + ([sym] + `(def ~(-> sym name symbol) ~sym)) + ([sym name] + `(def ~name ~sym))) + +(defmacro import-fn + ([sym] + (import-macro* sym)) + ([sym name] + (import-macro* sym name))) + +(defmacro import-macro + ([sym] + (import-macro* sym)) + ([sym name] + (import-macro* sym name))) + +(defmacro import-def + ([sym] + (import-macro* sym)) + ([sym name] + (import-macro* sym name))) + +#_ +(defmacro import-vars + "Imports a list of vars from other namespaces." + [& syms] + (let [unravel (fn unravel [x] + (if (sequential? x) + (->> x + rest + (mapcat unravel) + (map + #(symbol + (str (first x) + (when-let [n (namespace %)] + (str "." n))) + (name %)))) + [x])) + syms (mapcat unravel syms) + result `(do + ~@(map + (fn [sym] + (let [vr (resolve sym) + m (meta vr)] + (cond + (nil? vr) `(throw (ex-info (format "`%s` does not exist" '~sym) {})) + (:macro m) `(def ~(-> sym name symbol) ~sym) + (:arglists m) `(def ~(-> sym name symbol) ~sym) + :else `(def ~(-> sym name symbol) ~sym)))) + syms))] + result)) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0a04143 --- /dev/null +++ b/.gitignore @@ -0,0 +1,30 @@ +.calva/output-window/ +.calva/repl.calva-repl +.classpath +.clj-kondo/.cache +.cpcache +.eastwood +.factorypath +.hg/ +.hgignore +.java-version +.lein-* +.lsp/.cache +.lsp/sqlite.db +.nrepl-history +.nrepl-port +.portal/vs-code.edn +.project +.rebel_readline_history +.settings +.socket-repl-port +.sw* +.vscode +*.class +*.jar +*.swp +*~ +/checkouts +/classes +/target + diff --git a/README.md b/README.md new file mode 100644 index 0000000..9561b72 --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# OERSI Utils + +Some utilities to work with the open endpoints of OERSI, best run in the REPL at the moment. + +Based on this documentation of OERSI: https://pad.gwdg.de/s/oersi-faq#Beispiel-Datenabzug + + diff --git a/deps.edn b/deps.edn new file mode 100644 index 0000000..8e93925 --- /dev/null +++ b/deps.edn @@ -0,0 +1,3 @@ +{:deps {clj-http/clj-http {:mvn/version "3.13.0"} + cheshire/cheshire {:mvn/version "5.13.0"} + nostr/nostr {:local/root "/home/steffen/coding/nostr-clj"}}} diff --git a/src/oersi/core.clj b/src/oersi/core.clj new file mode 100644 index 0000000..5020dca --- /dev/null +++ b/src/oersi/core.clj @@ -0,0 +1,72 @@ +(ns oersi.core + (:require [clj-http.client :as http] + [cheshire.core :as json] + [clojure.java.io :as io] + [nostr.edufeed :as edufeed])) + +(defn fetch-data [pit-id last-sort-value] + (let [url "https://oersi.org/resources/api-internal/search/_search?pretty" + _ (println last-sort-value) + query (merge {:size 1000 + :query {:match {:mainEntityOfPage.provider.name "twillo"}} + :pit {:id pit-id + :keep_alive "1m"} + :sort [{:id "asc"}] + :track_total_hits true} + (when last-sort-value + {:search_after last-sort-value})) + response (http/post url + {:accept :json + :content-type :json + :body (json/generate-string query)})] + (json/parse-string (:body response) true))) + +(defn save-hits [hits] + (println "Got " (count hits) "results")) + +;; Function to save a batch of hits to a JSON line file +(defn save-to-jsonl [data file-path] + (with-open [writer (io/writer file-path :append true)] + (doseq [record (:hits (:hits data))] + (.write writer (str (json/generate-string record) "\n"))))) + +(defn crawl-oersi [args] + (println "Crawl oersi" args) + (let [output-file "oersi_data.jsonl" + pit (http/post "https://oersi.org/resources/api-internal/search/oer_data/_pit?keep_alive=1m&pretty" + {:accept :json}) + pit-id (-> pit :body (#(json/parse-string % true)) :id)] + (println "Generated PIT: " pit-id) + (loop [last-sort-value nil] + (let [body (fetch-data pit-id last-sort-value) + hits (-> body :hits :hits)] + (save-to-jsonl body output-file) + (if-not (empty? hits) + (recur (get (last hits) :sort)) + (println "no more records to fetch")))))) + +(defn search-oersi [args] + (let [url "https://oersi.org/resources/api-internal/search/oer_data/_search?pretty" + query-2 {:size 1 + :from 0 + :query {:match_all {}}} + query {:size 20 + :from 0 + :query {:multi_match {:query "Klimawandel" + :fields ["name", "description", "keywords"]}} + :sort [{:id "asc"}]} + response (http/post url + {:content-type :json + :accept :json + :body (json/generate-string query-2)})] + (println response))) + +;; FIXME read file, and then process line by line +(defn export-to-nostr [args] + (println args) + (let [file-path (:file-path args) + _ (println "file path" file-path)] + (edufeed/transform-amb-to-30142-event))) + +(defn -main [] + (println "Hello world"))