From 369bc4b101eb8257dee3a04cc95b39bb68a2430e Mon Sep 17 00:00:00 2001 From: "@s.roertgen" Date: Thu, 16 Jan 2025 22:07:24 +0100 Subject: [PATCH] Adjust to new oersi url --- README.md | 10 +++++++++- deps.edn | 6 ++++-- src/oersi/core.clj | 18 +++++++++++------- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 9561b72..5bbc155 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,14 @@ Some utilities to work with the open endpoints of OERSI, best run in the REPL at the moment. -Based on this documentation of OERSI: https://pad.gwdg.de/s/oersi-faq#Beispiel-Datenabzug +Based on this documentation of OERSI: https://sidre-docs-ba5cfa.gitlab.io/api/ + + +## Usage + +Crawl everything with provider `twillo` from oersi's `oer_data` index. + +`clj -X oersi.core/crawl-oersi :provider twillo` + diff --git a/deps.edn b/deps.edn index 8e93925..78262df 100644 --- a/deps.edn +++ b/deps.edn @@ -1,3 +1,5 @@ -{:deps {clj-http/clj-http {:mvn/version "3.13.0"} +{:deps {org.clojure/clojure {:mvn/version "1.12.0"} + org.clojure/core.async {:mvn/version "1.6.673"} + clj-http/clj-http {:mvn/version "3.13.0"} cheshire/cheshire {:mvn/version "5.13.0"} - nostr/nostr {:local/root "/home/steffen/coding/nostr-clj"}}} + net.clojars.laoc/nostr {:local/root "/home/laoc/coding/nostr-clj"}}} diff --git a/src/oersi/core.clj b/src/oersi/core.clj index 5020dca..f4f2816 100644 --- a/src/oersi/core.clj +++ b/src/oersi/core.clj @@ -4,11 +4,11 @@ [clojure.java.io :as io] [nostr.edufeed :as edufeed])) -(defn fetch-data [pit-id last-sort-value] - (let [url "https://oersi.org/resources/api-internal/search/_search?pretty" +(defn fetch-data [pit-id last-sort-value provider] + (let [url "https://oersi.org/resources/api/search/_search?pretty" _ (println last-sort-value) query (merge {:size 1000 - :query {:match {:mainEntityOfPage.provider.name "twillo"}} + :query {:match {:mainEntityOfPage.provider.name provider}} :pit {:id pit-id :keep_alive "1m"} :sort [{:id "asc"}] @@ -33,12 +33,13 @@ (defn crawl-oersi [args] (println "Crawl oersi" args) (let [output-file "oersi_data.jsonl" - pit (http/post "https://oersi.org/resources/api-internal/search/oer_data/_pit?keep_alive=1m&pretty" - {:accept :json}) + pit (http/post "https://oersi.org/resources/api/search/oer_data/_pit?keep_alive=1m&pretty" + {:accept :json + :user-agent "edufeed, mail@edufeed.org"}) pit-id (-> pit :body (#(json/parse-string % true)) :id)] (println "Generated PIT: " pit-id) (loop [last-sort-value nil] - (let [body (fetch-data pit-id last-sort-value) + (let [body (fetch-data pit-id last-sort-value (:provider args)) hits (-> body :hits :hits)] (save-to-jsonl body output-file) (if-not (empty? hits) @@ -46,7 +47,7 @@ (println "no more records to fetch")))))) (defn search-oersi [args] - (let [url "https://oersi.org/resources/api-internal/search/oer_data/_search?pretty" + (let [url "https://oersi.org/resources/api/search/oer_data/_search?pretty" query-2 {:size 1 :from 0 :query {:match_all {}}} @@ -61,6 +62,9 @@ :body (json/generate-string query-2)})] (println response))) +(comment + (search-oersi [])) + ;; FIXME read file, and then process line by line (defn export-to-nostr [args] (println args)