mirror of
https://github.com/edufeed-org/oersi-utils.git
synced 2025-12-07 23:34:35 +00:00
Adjust to new oersi url
This commit is contained in:
parent
9cbba9f341
commit
369bc4b101
3 changed files with 24 additions and 10 deletions
10
README.md
10
README.md
|
|
@ -2,6 +2,14 @@
|
|||
|
||||
Some utilities to work with the open endpoints of OERSI, best run in the REPL at the moment.
|
||||
|
||||
Based on this documentation of OERSI: https://pad.gwdg.de/s/oersi-faq#Beispiel-Datenabzug
|
||||
Based on this documentation of OERSI: https://sidre-docs-ba5cfa.gitlab.io/api/
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
Crawl everything with provider `twillo` from oersi's `oer_data` index.
|
||||
|
||||
`clj -X oersi.core/crawl-oersi :provider twillo`
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
6
deps.edn
6
deps.edn
|
|
@ -1,3 +1,5 @@
|
|||
{:deps {clj-http/clj-http {:mvn/version "3.13.0"}
|
||||
{:deps {org.clojure/clojure {:mvn/version "1.12.0"}
|
||||
org.clojure/core.async {:mvn/version "1.6.673"}
|
||||
clj-http/clj-http {:mvn/version "3.13.0"}
|
||||
cheshire/cheshire {:mvn/version "5.13.0"}
|
||||
nostr/nostr {:local/root "/home/steffen/coding/nostr-clj"}}}
|
||||
net.clojars.laoc/nostr {:local/root "/home/laoc/coding/nostr-clj"}}}
|
||||
|
|
|
|||
|
|
@ -4,11 +4,11 @@
|
|||
[clojure.java.io :as io]
|
||||
[nostr.edufeed :as edufeed]))
|
||||
|
||||
(defn fetch-data [pit-id last-sort-value]
|
||||
(let [url "https://oersi.org/resources/api-internal/search/_search?pretty"
|
||||
(defn fetch-data [pit-id last-sort-value provider]
|
||||
(let [url "https://oersi.org/resources/api/search/_search?pretty"
|
||||
_ (println last-sort-value)
|
||||
query (merge {:size 1000
|
||||
:query {:match {:mainEntityOfPage.provider.name "twillo"}}
|
||||
:query {:match {:mainEntityOfPage.provider.name provider}}
|
||||
:pit {:id pit-id
|
||||
:keep_alive "1m"}
|
||||
:sort [{:id "asc"}]
|
||||
|
|
@ -33,12 +33,13 @@
|
|||
(defn crawl-oersi [args]
|
||||
(println "Crawl oersi" args)
|
||||
(let [output-file "oersi_data.jsonl"
|
||||
pit (http/post "https://oersi.org/resources/api-internal/search/oer_data/_pit?keep_alive=1m&pretty"
|
||||
{:accept :json})
|
||||
pit (http/post "https://oersi.org/resources/api/search/oer_data/_pit?keep_alive=1m&pretty"
|
||||
{:accept :json
|
||||
:user-agent "edufeed, mail@edufeed.org"})
|
||||
pit-id (-> pit :body (#(json/parse-string % true)) :id)]
|
||||
(println "Generated PIT: " pit-id)
|
||||
(loop [last-sort-value nil]
|
||||
(let [body (fetch-data pit-id last-sort-value)
|
||||
(let [body (fetch-data pit-id last-sort-value (:provider args))
|
||||
hits (-> body :hits :hits)]
|
||||
(save-to-jsonl body output-file)
|
||||
(if-not (empty? hits)
|
||||
|
|
@ -46,7 +47,7 @@
|
|||
(println "no more records to fetch"))))))
|
||||
|
||||
(defn search-oersi [args]
|
||||
(let [url "https://oersi.org/resources/api-internal/search/oer_data/_search?pretty"
|
||||
(let [url "https://oersi.org/resources/api/search/oer_data/_search?pretty"
|
||||
query-2 {:size 1
|
||||
:from 0
|
||||
:query {:match_all {}}}
|
||||
|
|
@ -61,6 +62,9 @@
|
|||
:body (json/generate-string query-2)})]
|
||||
(println response)))
|
||||
|
||||
(comment
|
||||
(search-oersi []))
|
||||
|
||||
;; FIXME read file, and then process line by line
|
||||
(defn export-to-nostr [args]
|
||||
(println args)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue