ISNI: Difference between revisions

From annawiki
No edit summary
No edit summary
(21 intermediate revisions by the same user not shown)
Line 4: Line 4:


----------
----------
  wget http://viaf.org/viaf/data/viaf-20170906-links.txt.gz
  wget http://viaf.org/viaf/data/viaf-20170806-links.txt.gz
make smaller
make smaller
  gunzip < viaf-20170906-links.txt.gz | bzip2 > viaf-20170906-links.txt.bz2
  gunzip < viaf-20170806-links.txt.gz | bzip2 > viaf-20170806-links.txt.bz2
extract uncompressed file (keep original compressed file)
extract uncompressed file (keep original compressed file)
  gunzip -c viaf-20170906-links.txt.gz > viaf-20170906-links.txt
  gunzip -c viaf-20170806-links.txt.gz > viaf-20170806-links.txt
see sizes
see sizes
  ls -la
  ls -la
  -rw-r--r--  1 root    root    4440512262 Sep 21 11:44 viaf-20170906-links.txt
  -rw-r--r--  1 root    root    4414758007 Sep 21 13:42 viaf-20170806-links.txt
  -rw-r--r--  1 root    root      666148170 Sep 21 11:28 viaf-20170906-links.txt.bz2
  -rw-r--r--  1 root    root      661276773 Sep 21 13:40 viaf-20170806-links.txt.bz2
  -rw-r--r--  1 root    root      814025893 Sep 7 10:36 viaf-20170906-links.txt.gz
  -rw-r--r--  1 root    root      807433803 Aug  8 00:09 viaf-20170806-links.txt.gz
view data
zcat viaf-20170806-links.txt.gz | head -n 100
extract lines that contain "ISNI"
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' > viaf-20170806-links.isni.txt
remove viaf and isni prefixes and sort by ISNI
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | sort -k2 | gzip > viaf-20170806-links.isni-noprefix.sortbyisni.txt.gz
replace tab with comma - increases file size!
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -k2 | gzip > viaf-20170806-links.isni-noprefix.sortbyisni.txt.gz
 
------------------
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort | bzip2 > viaf-20170806-links.isni-noprefix.sortbyviaf.csv.bz2
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort | gzip > viaf-20170806-links.isni-noprefix.sortbyviaf.csv.gz
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 > viaf-20170806-links.isni-noprefix.sortbyisni.csv
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 | bzip2 > viaf-20170806-links.isni-noprefix.sortbyisni.csv.bz2
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 | gzip > viaf-20170806-links.isni-noprefix.sortbyisni.csv.gz
 
Change column order
paste <(cut -f2 viaf-20170806-links.isni-noprefix.txt) <(cut -f1 viaf-20170806-links.isni-noprefix.txt) > viaf-20170806-links.isni-noprefix.isni-viaf.txt
 
Sort (seperator ",") by column 2
sort -t',' -k2
 
===viaf-20170806-links.isni-noprefix.viaf-isni===
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | bzip2 > viaf-20170806-links.isni-noprefix.viaf-isni.txt.bz2
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | gzip > viaf-20170806-links.isni-noprefix.viaf-isni.txt.gz
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | sort -k2 > viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.txt
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | sort -k2 | bzip2 > viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.txt.bz2
7z a viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.txt.7z viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.txt
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| bzip2 > viaf-20170806-links.isni-noprefix.viaf-isni.csv.bz2
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| gzip > viaf-20170806-links.isni-noprefix.viaf-isni.csv.gz
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k1 > viaf-20170806-links.isni-noprefix.viaf-isni.sortk1.csv
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k1 | bzip2 > viaf-20170806-links.isni-noprefix.viaf-isni.sortk1.csv.bz2
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k1 | gzip > viaf-20170806-links.isni-noprefix.viaf-isni.sortk1.csv.gz
7z a viaf-20170806-links.isni-noprefix.viaf-isni.sortk1.csv.7z viaf-20170806-links.isni-noprefix.viaf-isni.sortk1.csv
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 | bzip2 > viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.csv.bz2
  zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 | gzip > viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.csv.gz
 
===viaf-20170806-links.isni-noprefix.isni-viaf===
7z a viaf-20170806-links.isni-noprefix.isni-viaf.sort.csv.7z viaf-20170806-links.isni-noprefix.isni-viaf.sort.csv
 
===viaf-20170806-links.isni-noprefix.isni===
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI,//' -e 's/[\t|]/,/g' -e s'/[0-9]*,//'> viaf-20170806-links.isni-noprefix.isni.txt
 
==Wikidata==
2017-09-21: 414388 results
http://tinyurl.com/y93zjb9q
SELECT ?qid ?isni
WHERE
{
  BIND( xsd:integer( STRAFTER( STR( ?item ), STR( wd:Q ) ) ) AS ?qid )
  ?item wdt:P213 ?isni
}
ORDER BY ?qid
 
2017-09-21: 421444 results
http://tinyurl.com/yd854f2x
SELECT ?qid ?isni ?viaf
WHERE
{
  BIND( xsd:integer( STRAFTER( STR( ?item ), STR( wd:Q ) ) ) AS ?qid )
  ?item wdt:P213 ?isni.
  OPTIONAL {?item wdt:P214 ?viaf}
}
ORDER BY ?qid
Timeout
http://tinyurl.com/y8dlto88
SELECT ?qid ?isni ?viaf
WHERE
{
  BIND( xsd:integer( STRAFTER( STR( ?item ), STR( wd:Q ) ) ) AS ?qid )
  ?item wdt:P214 ?viaf
  OPTIONAL {?item wdt:P213 ?isni.}
}
ORDER BY ?qid
 
Timeout
http://tinyurl.com/yd4bmqgj
SELECT ?qid ?viaf
WHERE
{
  BIND( xsd:integer( STRAFTER( STR( ?item ), STR( wd:Q ) ) ) AS ?qid )
  ?item wdt:P214 ?viaf
  FILTER NOT EXISTS{ ?item wdt:P213 ?isni }
}
ORDER BY ?qid
 
2017-09-21: 1064452 results
http://tinyurl.com/yafxk3cj
SELECT ?qid ?viaf
WHERE
{
  BIND( xsd:integer( STRAFTER( STR( ?item ), STR( wd:Q ) ) ) AS ?qid )
  ?item wdt:P214 ?viaf
}
ORDER BY ?qid
===Wikidata missing===
SELECT wikidata_qid_viaf.qid,wikidata_qid_viaf.viaf,viaforg_viaf_isni.isni
FROM `wikidata_qid_viaf`
LEFT JOIN wikidata_qid_isni
ON wikidata_qid_viaf.qid = wikidata_qid_isni.qid
LEFT JOIN viaforg_viaf_isni
ON wikidata_qid_viaf.viaf = viaforg_viaf_isni.viaf
WHERE wikidata_qid_isni.qid IS NULL
AND viaforg_viaf_isni.isni IS NOT NULL
 
==About==
*https://wiki.duraspace.org/display/PCCISNI/Getting+started+with+ISNI+and+the+ISNI+web+interface

Revision as of 2018-09-04T01:30:54

VIAF


wget http://viaf.org/viaf/data/viaf-20170806-links.txt.gz

make smaller

gunzip < viaf-20170806-links.txt.gz | bzip2 > viaf-20170806-links.txt.bz2

extract uncompressed file (keep original compressed file)

gunzip -c viaf-20170806-links.txt.gz > viaf-20170806-links.txt

see sizes

ls -la
-rw-r--r--  1 root     root     4414758007 Sep 21 13:42 viaf-20170806-links.txt
-rw-r--r--  1 root     root      661276773 Sep 21 13:40 viaf-20170806-links.txt.bz2
-rw-r--r--  1 root     root      807433803 Aug  8 00:09 viaf-20170806-links.txt.gz

view data

zcat viaf-20170806-links.txt.gz | head -n 100

extract lines that contain "ISNI"

zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' > viaf-20170806-links.isni.txt

remove viaf and isni prefixes and sort by ISNI

zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | sort -k2 | gzip > viaf-20170806-links.isni-noprefix.sortbyisni.txt.gz

replace tab with comma - increases file size!

zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -k2 | gzip > viaf-20170806-links.isni-noprefix.sortbyisni.txt.gz

zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort | bzip2 > viaf-20170806-links.isni-noprefix.sortbyviaf.csv.bz2
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort | gzip > viaf-20170806-links.isni-noprefix.sortbyviaf.csv.gz
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 > viaf-20170806-links.isni-noprefix.sortbyisni.csv
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 | bzip2 > viaf-20170806-links.isni-noprefix.sortbyisni.csv.bz2
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 | gzip > viaf-20170806-links.isni-noprefix.sortbyisni.csv.gz

Change column order

paste <(cut -f2 viaf-20170806-links.isni-noprefix.txt) <(cut -f1 viaf-20170806-links.isni-noprefix.txt) > viaf-20170806-links.isni-noprefix.isni-viaf.txt

Sort (seperator ",") by column 2

sort -t',' -k2

viaf-20170806-links.isni-noprefix.viaf-isni

zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | bzip2 > viaf-20170806-links.isni-noprefix.viaf-isni.txt.bz2
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | gzip > viaf-20170806-links.isni-noprefix.viaf-isni.txt.gz

zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | sort -k2 > viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.txt
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | sort -k2 | bzip2 > viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.txt.bz2
7z a viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.txt.7z viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.txt

zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| bzip2 > viaf-20170806-links.isni-noprefix.viaf-isni.csv.bz2
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| gzip > viaf-20170806-links.isni-noprefix.viaf-isni.csv.gz

zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k1 > viaf-20170806-links.isni-noprefix.viaf-isni.sortk1.csv
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k1 | bzip2 > viaf-20170806-links.isni-noprefix.viaf-isni.sortk1.csv.bz2
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k1 | gzip > viaf-20170806-links.isni-noprefix.viaf-isni.sortk1.csv.gz
7z a viaf-20170806-links.isni-noprefix.viaf-isni.sortk1.csv.7z viaf-20170806-links.isni-noprefix.viaf-isni.sortk1.csv
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 | bzip2 > viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.csv.bz2
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 | gzip > viaf-20170806-links.isni-noprefix.viaf-isni.sortk2.csv.gz

viaf-20170806-links.isni-noprefix.isni-viaf

7z a viaf-20170806-links.isni-noprefix.isni-viaf.sort.csv.7z viaf-20170806-links.isni-noprefix.isni-viaf.sort.csv

viaf-20170806-links.isni-noprefix.isni

zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI,//' -e 's/[\t|]/,/g' -e s'/[0-9]*,//'> viaf-20170806-links.isni-noprefix.isni.txt

Wikidata

2017-09-21: 414388 results

http://tinyurl.com/y93zjb9q

SELECT ?qid ?isni
WHERE
{
 BIND( xsd:integer( STRAFTER( STR( ?item ), STR( wd:Q ) ) ) AS ?qid )
 ?item wdt:P213 ?isni
}
ORDER BY ?qid

2017-09-21: 421444 results

http://tinyurl.com/yd854f2x
SELECT ?qid ?isni ?viaf
WHERE 
{
 BIND( xsd:integer( STRAFTER( STR( ?item ), STR( wd:Q ) ) ) AS ?qid )
 ?item wdt:P213 ?isni.
 OPTIONAL {?item wdt:P214 ?viaf}
}
ORDER BY ?qid

Timeout

http://tinyurl.com/y8dlto88
SELECT ?qid ?isni ?viaf
WHERE 
{
 BIND( xsd:integer( STRAFTER( STR( ?item ), STR( wd:Q ) ) ) AS ?qid )
 ?item wdt:P214 ?viaf
 OPTIONAL {?item wdt:P213 ?isni.}
}
ORDER BY ?qid

Timeout

http://tinyurl.com/yd4bmqgj
SELECT ?qid ?viaf
WHERE 
{
 BIND( xsd:integer( STRAFTER( STR( ?item ), STR( wd:Q ) ) ) AS ?qid )
 ?item wdt:P214 ?viaf
 FILTER NOT EXISTS{ ?item wdt:P213 ?isni }
}
ORDER BY ?qid

2017-09-21: 1064452 results

http://tinyurl.com/yafxk3cj
SELECT ?qid ?viaf
WHERE 
{
 BIND( xsd:integer( STRAFTER( STR( ?item ), STR( wd:Q ) ) ) AS ?qid )
 ?item wdt:P214 ?viaf
}
ORDER BY ?qid

Wikidata missing

SELECT wikidata_qid_viaf.qid,wikidata_qid_viaf.viaf,viaforg_viaf_isni.isni
FROM `wikidata_qid_viaf`

LEFT JOIN wikidata_qid_isni 
ON wikidata_qid_viaf.qid = wikidata_qid_isni.qid

LEFT JOIN viaforg_viaf_isni 
ON wikidata_qid_viaf.viaf = viaforg_viaf_isni.viaf

WHERE wikidata_qid_isni.qid IS NULL
AND viaforg_viaf_isni.isni IS NOT NULL

About