Tracking provenance in ODIS#
Introduction#
Provenance tells us where something came from, and what happened to it on the way. Accurately tracking provenance has always been essential to building trust in any object (e.g. Is the antique I have at home genuine? Has the evidence used in a court case been handled in a secure chain of custody?). Provenance in digital (meta)data management is no different, and the more clearly and completely your (meta)data’s provenance is documented, the more third parties will be able to (re)use it and trust it.
Here, we provide some guidance on how to encode provenance information in JSON-LD/schema.org, for discovery across the ODIS Federation and its users.
Gleaner Prov#
The Gleaner application generates a prov graph of the activity of accessing and indexing provider resources. The main goal of this prov is to connect an indexed URL to the digital object stored in the object store. This digital object should be the JSON-LD data graph presented by the provider.
By contrast, the authoritative reference in the various profiles will connect the the data graph ID, or in the absence of that the data graph URL or the referenced resources URL by gleaner, to another reference. This may be an organization ID or a PID of the connected resource.
1{
2 "@context": {
3 "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
4 "prov": "http://www.w3.org/ns/prov#",
5 "rdfs": "http://www.w3.org/2000/01/rdf-schema#"
6 },
7 "@graph": [
8 {
9 "@id": "https://www.re3data.org/repository/obis",
10 "@type": "prov:Organization",
11 "rdf:name": "Ocean Biodiversity Information System",
12 "rdfs:seeAlso": "https://obis.org"
13 },
14 {
15 "@id": "https://obis.org/dataset/9381239f-3d64-48b4-80c9-b9ebb674edc2",
16 "@type": "prov:Entity",
17 "prov:wasAttributedTo": {
18 "@id": "https://www.re3data.org/repository/obis"
19 },
20 "prov:value": "https://obis.org/dataset/9381239f-3d64-48b4-80c9-b9ebb674edc2"
21 },
22 {
23 "@id": "https://gleaner.io/id/collection/7c1eaa1aaed95861330109026c42e57a31ecae55",
24 "@type": "prov:Collection",
25 "prov:hadMember": {
26 "@id": "https://obis.org/dataset/9381239f-3d64-48b4-80c9-b9ebb674edc2"
27 }
28 },
29 {
30 "@id": "urn:gleaner:milled:obis:7c1eaa1aaed95861330109026c42e57a31ecae55",
31 "@type": "prov:Entity",
32 "prov:value": "7c1eaa1aaed95861330109026c42e57a31ecae55.jsonld"
33 },
34 {
35 "@id": "https://gleaner.io/id/run/7c1eaa1aaed95861330109026c42e57a31ecae55",
36 "@type": "prov:Activity",
37 "prov:endedAtTime": {
38 "@value": "2021-04-20",
39 "@type": "http://www.w3.org/2001/XMLSchema#dateTime"
40 },
41 "prov:generated": {
42 "@id": "urn:gleaner:milled:obis:7c1eaa1aaed95861330109026c42e57a31ecae55"
43 },
44 "prov:used": {
45 "@id": "https://gleaner.io/id/collection/7c1eaa1aaed95861330109026c42e57a31ecae55"
46 }
47 }
48 ]
49}
{
"@context": {
"@vocab": "https://schema.org/",
"prov": "http://www.w3.org/ns/prov#"
},
"@id": "https://gleaner.io/id/run/7c1eaa1aaed95861330109026c42e57a31ecae55",
"@type": "prov:Activity",
"prov:endedAtTime": {
"@type": "http://www.w3.org/2001/XMLSchema#dateTime",
"@value": "2021-04-20"
},
"prov:generated": {
"@id": "urn:gleaner:milled:obis:7c1eaa1aaed95861330109026c42e57a31ecae55",
"@type": "prov:Entity",
"prov:value": "7c1eaa1aaed95861330109026c42e57a31ecae55.jsonld"
},
"prov:used": {
"@id": "https://gleaner.io/id/collection/7c1eaa1aaed95861330109026c42e57a31ecae55",
"@type": "prov:Collection",
"prov:hadMember": {
"@id": "https://obis.org/dataset/9381239f-3d64-48b4-80c9-b9ebb674edc2",
"@type": "prov:Entity",
"prov:value": "https://obis.org/dataset/9381239f-3d64-48b4-80c9-b9ebb674edc2",
"prov:wasAttributedTo": {
"@id": "https://www.re3data.org/repository/obis",
"@type": "prov:Organization",
"http://www.w3.org/1999/02/22-rdf-syntax-ns#name": "Ocean Biodiversity Information System",
"http://www.w3.org/2000/01/rdf-schema#seeAlso": "https://obis.org"
}
}
}
}
Nano Prov#
This is a basic nanoprov example. Note, this is a draft and the ID connections and examples have not been made yet.
1{
2 "@context": {
3 "gleaner": "https://voc.gleaner.io/id/",
4 "np": "http://www.nanopub.org/nschema#",
5 "prov": "http://www.w3.org/ns/prov#",
6 "xsd": "http://www.w3.org/2001/XMLSchema#"
7 },
8 "@set": [
9 {
10 "@id": "gleaner:nanopub/XID",
11 "@type": "np:NanoPublication",
12 "np:hasAssertion": {
13 "@id": "gleaner:nanopub/XID#assertion"
14 },
15 "np:hasProvenance": {
16 "@id": "gleaner:nanopub/XID#provenance"
17 },
18 "np:hasPublicationInfo": {
19 "@id": "gleaner:nanopub/XID#pubInfo"
20 }
21 },
22 {
23 "@id": "gleaner:nanopub/XID#assertion",
24 "@graph": {
25 "@id": "DataSetURI",
26 "@type": "schema:Dataset",
27 "description": "This is where you would put corrections or annotations",
28 "identifier": [
29 {
30 "@type": "schema:PropertyValue",
31 "name": "GraphSHA",
32 "description": "A SHA256 sha stamp on the harvested data graph from a URL",
33 "value": "{{SHA256 HASH HERE}}"
34 },
35 {
36 "@type": "schema:PropertyValue",
37 "name": "ProviderID",
38 "description": "The id provided with the data graph by the provider",
39 "value": "{{re3 or URL noted in config}}"
40 },
41 {
42 "@type": "schema:PropertyValue",
43 "name": "URL",
44 "description": "The URL harvested by gleaner",
45 "value": "{{The URL the JSON-LD came from}}"
46 }
47 ]
48 }
49 },
50 {
51 "@id": "gleaner:nanopub/XID#provenance",
52 "@graph": {
53 "@id": "URIforprovondataset",
54 "prov:wasGeneratedAtTime": {
55 "@value": "dateDone",
56 "@type": "xsd:dateTime"
57 },
58 "prov:wasDerivedFrom": {
59 "@id": "IDHERE"
60 },
61 "prov:wasAttributedTo": {
62 "@id": "IDHERE"
63 }
64 }
65 },
66 {
67 "@id": "gleaner:nanopub/XID#pubInfo",
68 "@graph": {
69 "@id": "IDHERE",
70 "prov:wasAttributedTo": {
71 "@id": "gleaner:tool/gleaner"
72 },
73 "prov:generatedAtTime": {
74 "@value": "2019-10-23T14:38:00Z",
75 "@type": "xsd:dateTime"
76 }
77 }
78 }
79 ]
80}