ISNI: Difference between revisions
No edit summary |
|||
Line 126: | Line 126: | ||
WHERE wikidata_qid_isni.qid IS NULL | WHERE wikidata_qid_isni.qid IS NULL | ||
AND viaforg_viaf_isni.isni IS NOT NULL | AND viaforg_viaf_isni.isni IS NOT NULL | ||
==About== | |||
*https://wiki.duraspace.org/display/PCCISNI/Getting+started+with+ISNI+and+the+ISNI+web+interface |
Revision as of 2018-09-04T01:30:54
VIAF
wget http://viaf.org/viaf/data/viaf-20170806-links.txt.gz
make smaller
gunzip < viaf-20170806-links.txt.gz | bzip2 > viaf-20170806-links.txt.bz2
extract uncompressed file (keep original compressed file)
gunzip -c viaf-20170806-links.txt.gz > viaf-20170806-links.txt
see sizes
ls -la -rw-r--r-- 1 root root 4414758007 Sep 21 13:42 viaf-20170806-links.txt -rw-r--r-- 1 root root 661276773 Sep 21 13:40 viaf-20170806-links.txt.bz2 -rw-r--r-- 1 root root 807433803 Aug 8 00:09 viaf-20170806-links.txt.gz
view data
zcat viaf-20170806-links.txt.gz | head -n 100
extract lines that contain "ISNI"
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' > viaf-20170806-links.isni.txt
remove viaf and isni prefixes and sort by ISNI
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | sort -k2 | gzip > viaf-20170806-links.isni-noprefix.sortbyisni.txt.gz
replace tab with comma - increases file size!
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -k2 | gzip > viaf-20170806-links.isni-noprefix.sortbyisni.txt.gz
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort | bzip2 > viaf-20170806-links.isni-noprefix.sortbyviaf.csv.bz2 zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort | gzip > viaf-20170806-links.isni-noprefix.sortbyviaf.csv.gz zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 > viaf-20170806-links.isni-noprefix.sortbyisni.csv zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 | bzip2 > viaf-20170806-links.isni-noprefix.sortbyisni.csv.bz2 zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 | gzip > viaf-20170806-links.isni-noprefix.sortbyisni.csv.gz
Change column order
paste <(cut -f2 viaf-20170806-links.isni-noprefix.txt) <(cut -f1 viaf-20170806-links.isni-noprefix.txt) > viaf-20170806-links.isni-noprefix.isni-viaf.txt
Sort (seperator ",") by column 2
sort -t',' -k2
viaf-20170806-links.isni-noprefix.viaf-isni
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | bzip2 > viaf-20170806-links.isni-noprefix.viaf-isni.txt.bz2 zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | gzip > viaf-20170806-links.isni-noprefix.viaf-isni.txt.gz zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | sort -k2 > viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.txt zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | sort -k2 | bzip2 > viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.txt.bz2 7z a viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.txt.7z viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.txt zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| bzip2 > viaf-20170806-links.isni-noprefix.viaf-isni.csv.bz2 zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| gzip > viaf-20170806-links.isni-noprefix.viaf-isni.csv.gz zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k1 > viaf-20170806-links.isni-noprefix.viaf-isni.sortk1.csv zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k1 | bzip2 > viaf-20170806-links.isni-noprefix.viaf-isni.sortk1.csv.bz2 zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k1 | gzip > viaf-20170806-links.isni-noprefix.viaf-isni.sortk1.csv.gz 7z a viaf-20170806-links.isni-noprefix.viaf-isni.sortk1.csv.7z viaf-20170806-links.isni-noprefix.viaf-isni.sortk1.csv zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 | bzip2 > viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.csv.bz2 zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 | gzip > viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.csv.gz
viaf-20170806-links.isni-noprefix.isni-viaf
7z a viaf-20170806-links.isni-noprefix.isni-viaf.sort.csv.7z viaf-20170806-links.isni-noprefix.isni-viaf.sort.csv
viaf-20170806-links.isni-noprefix.isni
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI,//' -e 's/[\t|]/,/g' -e s'/[0-9]*,//'> viaf-20170806-links.isni-noprefix.isni.txt
Wikidata
2017-09-21: 414388 results
http://tinyurl.com/y93zjb9q SELECT ?qid ?isni WHERE { BIND( xsd:integer( STRAFTER( STR( ?item ), STR( wd:Q ) ) ) AS ?qid ) ?item wdt:P213 ?isni } ORDER BY ?qid
2017-09-21: 421444 results
http://tinyurl.com/yd854f2x SELECT ?qid ?isni ?viaf WHERE { BIND( xsd:integer( STRAFTER( STR( ?item ), STR( wd:Q ) ) ) AS ?qid ) ?item wdt:P213 ?isni. OPTIONAL {?item wdt:P214 ?viaf} } ORDER BY ?qid
Timeout
http://tinyurl.com/y8dlto88 SELECT ?qid ?isni ?viaf WHERE { BIND( xsd:integer( STRAFTER( STR( ?item ), STR( wd:Q ) ) ) AS ?qid ) ?item wdt:P214 ?viaf OPTIONAL {?item wdt:P213 ?isni.} } ORDER BY ?qid
Timeout
http://tinyurl.com/yd4bmqgj SELECT ?qid ?viaf WHERE { BIND( xsd:integer( STRAFTER( STR( ?item ), STR( wd:Q ) ) ) AS ?qid ) ?item wdt:P214 ?viaf FILTER NOT EXISTS{ ?item wdt:P213 ?isni } } ORDER BY ?qid
2017-09-21: 1064452 results
http://tinyurl.com/yafxk3cj SELECT ?qid ?viaf WHERE { BIND( xsd:integer( STRAFTER( STR( ?item ), STR( wd:Q ) ) ) AS ?qid ) ?item wdt:P214 ?viaf } ORDER BY ?qid
Wikidata missing
SELECT wikidata_qid_viaf.qid,wikidata_qid_viaf.viaf,viaforg_viaf_isni.isni FROM `wikidata_qid_viaf` LEFT JOIN wikidata_qid_isni ON wikidata_qid_viaf.qid = wikidata_qid_isni.qid LEFT JOIN viaforg_viaf_isni ON wikidata_qid_viaf.viaf = viaforg_viaf_isni.viaf WHERE wikidata_qid_isni.qid IS NULL AND viaforg_viaf_isni.isni IS NOT NULL