Adjust to new oersi url

This commit is contained in:
@s.roertgen 2025-01-16 22:07:24 +01:00
parent 9cbba9f341
commit 369bc4b101
3 changed files with 24 additions and 10 deletions

View file

@ -2,6 +2,14 @@
Some utilities to work with the open endpoints of OERSI, best run in the REPL at the moment. Some utilities to work with the open endpoints of OERSI, best run in the REPL at the moment.
Based on this documentation of OERSI: https://pad.gwdg.de/s/oersi-faq#Beispiel-Datenabzug Based on this documentation of OERSI: https://sidre-docs-ba5cfa.gitlab.io/api/
## Usage
Crawl everything with provider `twillo` from oersi's `oer_data` index.
`clj -X oersi.core/crawl-oersi :provider twillo`

View file

@ -1,3 +1,5 @@
{:deps {clj-http/clj-http {:mvn/version "3.13.0"} {:deps {org.clojure/clojure {:mvn/version "1.12.0"}
org.clojure/core.async {:mvn/version "1.6.673"}
clj-http/clj-http {:mvn/version "3.13.0"}
cheshire/cheshire {:mvn/version "5.13.0"} cheshire/cheshire {:mvn/version "5.13.0"}
nostr/nostr {:local/root "/home/steffen/coding/nostr-clj"}}} net.clojars.laoc/nostr {:local/root "/home/laoc/coding/nostr-clj"}}}

View file

@ -4,11 +4,11 @@
[clojure.java.io :as io] [clojure.java.io :as io]
[nostr.edufeed :as edufeed])) [nostr.edufeed :as edufeed]))
(defn fetch-data [pit-id last-sort-value] (defn fetch-data [pit-id last-sort-value provider]
(let [url "https://oersi.org/resources/api-internal/search/_search?pretty" (let [url "https://oersi.org/resources/api/search/_search?pretty"
_ (println last-sort-value) _ (println last-sort-value)
query (merge {:size 1000 query (merge {:size 1000
:query {:match {:mainEntityOfPage.provider.name "twillo"}} :query {:match {:mainEntityOfPage.provider.name provider}}
:pit {:id pit-id :pit {:id pit-id
:keep_alive "1m"} :keep_alive "1m"}
:sort [{:id "asc"}] :sort [{:id "asc"}]
@ -33,12 +33,13 @@
(defn crawl-oersi [args] (defn crawl-oersi [args]
(println "Crawl oersi" args) (println "Crawl oersi" args)
(let [output-file "oersi_data.jsonl" (let [output-file "oersi_data.jsonl"
pit (http/post "https://oersi.org/resources/api-internal/search/oer_data/_pit?keep_alive=1m&pretty" pit (http/post "https://oersi.org/resources/api/search/oer_data/_pit?keep_alive=1m&pretty"
{:accept :json}) {:accept :json
:user-agent "edufeed, mail@edufeed.org"})
pit-id (-> pit :body (#(json/parse-string % true)) :id)] pit-id (-> pit :body (#(json/parse-string % true)) :id)]
(println "Generated PIT: " pit-id) (println "Generated PIT: " pit-id)
(loop [last-sort-value nil] (loop [last-sort-value nil]
(let [body (fetch-data pit-id last-sort-value) (let [body (fetch-data pit-id last-sort-value (:provider args))
hits (-> body :hits :hits)] hits (-> body :hits :hits)]
(save-to-jsonl body output-file) (save-to-jsonl body output-file)
(if-not (empty? hits) (if-not (empty? hits)
@ -46,7 +47,7 @@
(println "no more records to fetch")))))) (println "no more records to fetch"))))))
(defn search-oersi [args] (defn search-oersi [args]
(let [url "https://oersi.org/resources/api-internal/search/oer_data/_search?pretty" (let [url "https://oersi.org/resources/api/search/oer_data/_search?pretty"
query-2 {:size 1 query-2 {:size 1
:from 0 :from 0
:query {:match_all {}}} :query {:match_all {}}}
@ -61,6 +62,9 @@
:body (json/generate-string query-2)})] :body (json/generate-string query-2)})]
(println response))) (println response)))
(comment
(search-oersi []))
;; FIXME read file, and then process line by line ;; FIXME read file, and then process line by line
(defn export-to-nostr [args] (defn export-to-nostr [args]
(println args) (println args)