Adjust to new oersi url

This commit is contained in:
@s.roertgen 2025-01-16 22:07:24 +01:00
parent 9cbba9f341
commit 369bc4b101
3 changed files with 24 additions and 10 deletions

View file

@ -2,6 +2,14 @@
Some utilities to work with the open endpoints of OERSI, best run in the REPL at the moment.
Based on this documentation of OERSI: https://pad.gwdg.de/s/oersi-faq#Beispiel-Datenabzug
Based on this documentation of OERSI: https://sidre-docs-ba5cfa.gitlab.io/api/
## Usage
Crawl everything with provider `twillo` from oersi's `oer_data` index.
`clj -X oersi.core/crawl-oersi :provider twillo`

View file

@ -1,3 +1,5 @@
{:deps {clj-http/clj-http {:mvn/version "3.13.0"}
{:deps {org.clojure/clojure {:mvn/version "1.12.0"}
org.clojure/core.async {:mvn/version "1.6.673"}
clj-http/clj-http {:mvn/version "3.13.0"}
cheshire/cheshire {:mvn/version "5.13.0"}
nostr/nostr {:local/root "/home/steffen/coding/nostr-clj"}}}
net.clojars.laoc/nostr {:local/root "/home/laoc/coding/nostr-clj"}}}

View file

@ -4,11 +4,11 @@
[clojure.java.io :as io]
[nostr.edufeed :as edufeed]))
(defn fetch-data [pit-id last-sort-value]
(let [url "https://oersi.org/resources/api-internal/search/_search?pretty"
(defn fetch-data [pit-id last-sort-value provider]
(let [url "https://oersi.org/resources/api/search/_search?pretty"
_ (println last-sort-value)
query (merge {:size 1000
:query {:match {:mainEntityOfPage.provider.name "twillo"}}
:query {:match {:mainEntityOfPage.provider.name provider}}
:pit {:id pit-id
:keep_alive "1m"}
:sort [{:id "asc"}]
@ -33,12 +33,13 @@
(defn crawl-oersi [args]
(println "Crawl oersi" args)
(let [output-file "oersi_data.jsonl"
pit (http/post "https://oersi.org/resources/api-internal/search/oer_data/_pit?keep_alive=1m&pretty"
{:accept :json})
pit (http/post "https://oersi.org/resources/api/search/oer_data/_pit?keep_alive=1m&pretty"
{:accept :json
:user-agent "edufeed, mail@edufeed.org"})
pit-id (-> pit :body (#(json/parse-string % true)) :id)]
(println "Generated PIT: " pit-id)
(loop [last-sort-value nil]
(let [body (fetch-data pit-id last-sort-value)
(let [body (fetch-data pit-id last-sort-value (:provider args))
hits (-> body :hits :hits)]
(save-to-jsonl body output-file)
(if-not (empty? hits)
@ -46,7 +47,7 @@
(println "no more records to fetch"))))))
(defn search-oersi [args]
(let [url "https://oersi.org/resources/api-internal/search/oer_data/_search?pretty"
(let [url "https://oersi.org/resources/api/search/oer_data/_search?pretty"
query-2 {:size 1
:from 0
:query {:match_all {}}}
@ -61,6 +62,9 @@
:body (json/generate-string query-2)})]
(println response)))
(comment
(search-oersi []))
;; FIXME read file, and then process line by line
(defn export-to-nostr [args]
(println args)