1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
(** The base URL of the software heritage instance used, defaults to
    [https://archive.softwareheritage.org]. *)
let instance = ref "https://archive.softwareheritage.org"

(**/**)

let url endpoint = Format.sprintf "%s/api/1%s" !instance endpoint

let field_not_found f =
  Error (Format.sprintf "field `%s` not found in the JSON response" f)

let on_response url f =
  match Ezcurl.get ~url () with
  | Error (code, msg) ->
    Error (Format.sprintf "curl error: code `%s` (%s)" (Curl.strerror code) msg)
  | Ok response -> (
    match Json.json_of_src (`String response.body) with
    | Error (_loc, _e) ->
      Error (Format.sprintf "error while parsing JSON response")
    | Ok response -> f response )

(**/**)

(** Same as [content] but expects an object identifier hash directly. *)
let content_unsafe ~hash_type hash =
  let url =
    url
      (Format.asprintf "/content/%s:%a/" hash_type Swhid_core.Object.Hash.pp
         hash )
  in
  on_response url (fun response ->
      let field = "data_url" in
      match Json.find_string field response with
      | Some data_url -> Ok data_url
      | None -> field_not_found field )

(** For a given content identifier, compute an URL from which the content can be
    downloaded. *)
let content id =
  match Swhid_core.Object.get_kind id with
  | Content hash_type ->
    content_unsafe ~hash_type @@ Swhid_core.Object.get_hash id
  | Directory | Release | Revision | Snapshot ->
    Error "invalid object type (expected Content)"

(** Same as [directory] but expects an object identifier hash directly. *)
let directory_unsafe hash =
  let url =
    url (Format.asprintf "/vault/directory/%a/" Swhid_core.Object.Hash.pp hash)
  in
  match Ezcurl.post ~params:[] ~url () with
  | Error (code, msg) ->
    Error (Format.sprintf "curl error: code `%s` (%s)" (Curl.strerror code) msg)
  | Ok response -> (
    match Json.json_of_src (`String response.body) with
    | Error (_loc, _e) ->
      Error (Format.sprintf "error while parsing JSON response")
    | Ok _response ->
      on_response url (fun response ->
          let field = "fetch_url" in
          match Json.find_string field response with
          | Some data_url -> Ok data_url
          | None -> field_not_found field ) )

(** For a given directory identifier, compute an URL from which the directory
    can be downloaded. *)
let directory id =
  match Swhid_core.Object.get_kind id with
  | Directory -> directory_unsafe @@ Swhid_core.Object.get_hash id
  | Content _ | Release | Revision | Snapshot ->
    Error "invalid object type (expected Directory)"

(** Same as [revision] but expects an object identifier hash directly. *)
let revision_unsafe hash =
  let url =
    url (Format.asprintf "/revision/%a/" Swhid_core.Object.Hash.pp hash)
  in
  on_response url (fun response ->
      let field = "directory" in
      match Json.find_string field response with
      | None -> field_not_found field
      | Some dir ->
        Result.bind (Swhid_core.Object.Hash.of_string dir) directory_unsafe )

(** For a given revision identifier, compute an URL from which the revision can
    be downloaded. *)
let revision id =
  match Swhid_core.Object.get_kind id with
  | Revision -> revision_unsafe @@ Swhid_core.Object.get_hash id
  | Content _ | Release | Directory | Snapshot ->
    Error "invalid object type (expected Revision)"

(** Same as [release] but expects an object identifier hash directly. *)
let rec release_unsafe hash =
  let url =
    url (Format.asprintf "/release/%a/" Swhid_core.Object.Hash.pp hash)
  in

  on_response url (fun response ->
      let field = "target_type" in
      match Json.find_string field response with
      | None -> field_not_found field
      | Some target_type -> (
        let field = "target" in
        match Json.find_string field response with
        | None -> field_not_found field
        | Some target -> begin
          let target = Swhid_core.Object.Hash.of_string target in
          match target_type with
          | "release" -> Result.bind target release_unsafe
          | "revision" -> Result.bind target revision_unsafe
          | "content" ->
            (* TODO: get the correct hash type *)
            Result.bind target (fun target ->
                content_unsafe target ~hash_type:"sha1_git" )
          | "directory" -> Result.bind target directory_unsafe
          | target_type ->
            Error (Format.sprintf "unknown target type: `%s`" target_type)
        end ) )

(** For a given release identifier, compute an URL from which the release can be
    downloaded. *)
let release id =
  match Swhid_core.Object.get_kind id with
  | Release -> release_unsafe @@ Swhid_core.Object.get_hash id
  | Content _ | Revision | Directory | Snapshot ->
    Error "invalid object type (expected Release)"

(** Same as [snapshot] but expects an object identifier hash directly. *)
let snapshot_unsafe =
  let go_through_objs = function
    | Json.Object o ->
      let rec aux target_type target jsonl =
        match (target_type, target) with
        | Some target_type, Some target -> begin
          match target_type with
          | "revision" -> Some (revision_unsafe, target)
          | "release" -> Some (release_unsafe, target)
          | "content" ->
            (* TODO: fetch the correct hash_type *)
            Some (content_unsafe ~hash_type:"sha1", target)
          | "directory" -> Some (directory_unsafe, target)
          | _ -> None
        end
        | _ -> (
          match jsonl with
          | [] -> None
          | ("target_type", Json.String value) :: r -> aux (Some value) target r
          | ("target", Json.String value) :: r -> aux target_type (Some value) r
          | (_, _) :: r -> aux target_type target r )
      in
      aux None None o
    | _ -> None
  in
  fun hash ->
    let url =
      url (Format.asprintf "/snapshot/%a/" Swhid_core.Object.Hash.pp hash)
    in

    on_response url (fun response ->
        let field = "branches" in
        match Json.find_obj field response with
        | None -> field_not_found field
        | Some branch ->
          let requests =
            List.filter_map (fun f -> go_through_objs @@ snd f) branch
          in
          Ok
            (List.map
               (fun (f, x) ->
                 let x = Swhid_core.Object.Hash.of_string x in
                 Result.bind x f )
               requests ) )

(** For a given snapshot identifier, compute a list of URL from which the
    snapshot's branches can be downloaded. *)
let snapshot id =
  match Swhid_core.Object.get_kind id with
  | Snapshot -> snapshot_unsafe @@ Swhid_core.Object.get_hash id
  | Content _ | Revision | Directory | Release ->
    Error "invalid object type (expected Snapshot)"

(** For any object identifier, compute a list of URLs from which the object can
    be downloaded. For all kind of object, the list should contain a single URL
    except for snapshot objects which may lead to a list of many URLs (one URL
    per branch). In the snapshot branch, if a single error is encountered, then
    the result will be an [Error] type with the list of all errors, and no URL
    is returned (even if we succeeded to compute some of them).*)
let any =
  let extract_url = function Error e -> Error [ e ] | Ok url -> Ok [ url ] in
  fun identifier : (string list, string list) Result.t ->
    let object_id = Swhid_core.Object.get_hash identifier in
    match Swhid_core.Object.get_kind identifier with
    | Content hash_type -> extract_url (content_unsafe ~hash_type object_id)
    | Directory -> extract_url (directory_unsafe object_id)
    | Release -> extract_url (release_unsafe object_id)
    | Revision -> extract_url (revision_unsafe object_id)
    | Snapshot -> (
      match snapshot_unsafe object_id with
      | Error e -> Error [ e ]
      | Ok res -> (
        match
          List.fold_left
            (fun acc r ->
              match acc with
              | Ok url_list -> begin
                match r with
                | Ok url -> Ok (url :: url_list)
                | Error e -> Error [ e ]
              end
              | Error error_list -> begin
                match r with
                | Ok _url -> Error error_list
                | Error e -> Error (e :: error_list)
              end )
            (Ok []) res
        with
        | Ok urls -> Ok (List.rev urls)
        | Error errors -> Error (List.rev errors) ) )