ISNI: Difference between revisions
No edit summary |
|||
Line 4: | Line 4: | ||
---------- | ---------- | ||
wget http://viaf.org/viaf/data/viaf- | wget http://viaf.org/viaf/data/viaf-20170806-links.txt.gz | ||
make smaller | make smaller | ||
gunzip < viaf- | gunzip < viaf-20170806-links.txt.gz | bzip2 > viaf-20170806-links.txt.bz2 | ||
extract uncompressed file (keep original compressed file) | extract uncompressed file (keep original compressed file) | ||
gunzip -c viaf- | gunzip -c viaf-20170806-links.txt.gz > viaf-20170806-links.txt | ||
see sizes | see sizes | ||
ls -la | ls -la | ||
-rw-r--r-- 1 root root | -rw-r--r-- 1 root root 4414758007 Sep 21 13:42 viaf-20170806-links.txt | ||
-rw-r--r-- 1 root root | -rw-r--r-- 1 root root 661276773 Sep 21 13:40 viaf-20170806-links.txt.bz2 | ||
-rw-r--r-- 1 root root | -rw-r--r-- 1 root root 807433803 Aug 8 00:09 viaf-20170806-links.txt.gz | ||
view data | view data | ||
zcat viaf-20170906-links.txt.gz | head -n 100 | zcat viaf-20170906-links.txt.gz | head -n 100 | ||
Line 20: | Line 20: | ||
remove viaf and isni prefixes and sort by ISNI | remove viaf and isni prefixes and sort by ISNI | ||
zcat viaf-20170906-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | sort -k2 | gzip > viaf-20170906-links.isni-noprefix.sortbyisni.txt.gz | zcat viaf-20170906-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | sort -k2 | gzip > viaf-20170906-links.isni-noprefix.sortbyisni.txt.gz | ||
replace tab with comma - increases file size! | |||
zcat viaf-20170906-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -k2 | gzip > viaf-20170906-links.isni-noprefix.sortbyisni.txt.gz | |||
------------------ | |||
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort | bzip2 > viaf-20170806-links.isni-noprefix.sortbyviaf.csv.bz2 | |||
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort | gzip > viaf-20170806-links.isni-noprefix.sortbyviaf.csv.gz | |||
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 > viaf-20170806-links.isni-noprefix.sortbyisni.csv | |||
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 | bzip2 > viaf-20170806-links.isni-noprefix.sortbyisni.csv.bz2 | |||
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 | gzip > viaf-20170806-links.isni-noprefix.sortbyisni.csv.gz |
Revision as of 2017-09-21T15:52:22
VIAF
wget http://viaf.org/viaf/data/viaf-20170806-links.txt.gz
make smaller
gunzip < viaf-20170806-links.txt.gz | bzip2 > viaf-20170806-links.txt.bz2
extract uncompressed file (keep original compressed file)
gunzip -c viaf-20170806-links.txt.gz > viaf-20170806-links.txt
see sizes
ls -la -rw-r--r-- 1 root root 4414758007 Sep 21 13:42 viaf-20170806-links.txt -rw-r--r-- 1 root root 661276773 Sep 21 13:40 viaf-20170806-links.txt.bz2 -rw-r--r-- 1 root root 807433803 Aug 8 00:09 viaf-20170806-links.txt.gz
view data
zcat viaf-20170906-links.txt.gz | head -n 100
extract lines that contain "ISNI"
zcat viaf-20170906-links.txt.gz | fgrep 'ISNI' > viaf-20170906-links.isni.txt
remove viaf and isni prefixes and sort by ISNI
zcat viaf-20170906-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' | sort -k2 | gzip > viaf-20170906-links.isni-noprefix.sortbyisni.txt.gz
replace tab with comma - increases file size!
zcat viaf-20170906-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -k2 | gzip > viaf-20170906-links.isni-noprefix.sortbyisni.txt.gz
zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort | bzip2 > viaf-20170806-links.isni-noprefix.sortbyviaf.csv.bz2 zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort | gzip > viaf-20170806-links.isni-noprefix.sortbyviaf.csv.gz zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 > viaf-20170806-links.isni-noprefix.sortbyisni.csv zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 | bzip2 > viaf-20170806-links.isni-noprefix.sortbyisni.csv.bz2 zcat viaf-20170806-links.txt.gz | fgrep 'ISNI' | sed -e 's@http://viaf.org/viaf/@@' -e s'/ISNI|//' -e 's/[\t|]/,/g'| sort -t',' -k2 | gzip > viaf-20170806-links.isni-noprefix.sortbyisni.csv.gz