diff options
author | Michał Górny <mgorny@gentoo.org> | 2020-05-04 13:26:10 +0200 |
---|---|---|
committer | Michał Górny <mgorny@gentoo.org> | 2020-05-04 13:26:10 +0200 |
commit | d6d068bfb0046fad9c2b2ddf686c69721ec6f4a5 (patch) | |
tree | 1d2527a8ddf7ff9117917d8c501f763c3bd7d721 /dev-python/nltk-data | |
parent | dev-python/oauthlib: Remove old (diff) | |
download | gentoo-d6d068bfb0046fad9c2b2ddf686c69721ec6f4a5.tar.gz gentoo-d6d068bfb0046fad9c2b2ddf686c69721ec6f4a5.tar.bz2 gentoo-d6d068bfb0046fad9c2b2ddf686c69721ec6f4a5.zip |
dev-python/nltk-data: Remove old
Signed-off-by: Michał Górny <mgorny@gentoo.org>
Diffstat (limited to 'dev-python/nltk-data')
-rw-r--r-- | dev-python/nltk-data/nltk-data-20200312.ebuild | 184 |
1 files changed, 0 insertions, 184 deletions
diff --git a/dev-python/nltk-data/nltk-data-20200312.ebuild b/dev-python/nltk-data/nltk-data-20200312.ebuild deleted file mode 100644 index 1d02afe6be7e..000000000000 --- a/dev-python/nltk-data/nltk-data-20200312.ebuild +++ /dev/null @@ -1,184 +0,0 @@ -# Copyright 2020 Gentoo Authors -# Distributed under the terms of the GNU General Public License v2 - -EAPI=7 - -inherit check-reqs - -DESCRIPTION="Data files for NLTK" -HOMEPAGE="https://www.nltk.org/nltk_data/" - -# at least some of the files have poorly documented licenses -# TODO: create a USE flag for free-ish subset -LICENSE="all-rights-reserved" -SLOT="0" -KEYWORDS="~amd64 ~x86" -IUSE="extra" -RESTRICT="bindist mirror" - -BDEPEND="app-arch/unzip" - -PACKAGES_ZIP=( - # wget -O - https://www.nltk.org/nltk_data/ | xml sel -t -m '//package[@unzip=0]' -v @subdir -o "/" -v @id -n - | sort - corpora/comtrans - corpora/conll2007 - corpora/jeita - corpora/knbc - corpora/machado - corpora/masc_tagged - corpora/nombank.1.0 - corpora/panlex_swadesh - corpora/propbank - corpora/reuters - corpora/semcor - corpora/universal_treebanks_v20 - sentiment/vader_lexicon - stemmers/snowball_data -) - -PACKAGES_UNPACK=( - # wget -O - https://www.nltk.org/nltk_data/ | xml sel -t -m '//package[@unzip=1]' -v @subdir -o "/" -v @id -n - | sort - corpora/abc - corpora/alpino - corpora/brown - corpora/cess_cat - corpora/cess_esp - corpora/chat80 - corpora/city_database - corpora/cmudict - corpora/comparative_sentences - corpora/conll2000 - corpora/conll2002 - corpora/crubadan - corpora/dependency_treebank - corpora/dolch - corpora/europarl_raw - corpora/floresta - corpora/framenet_v15 - corpora/framenet_v17 - corpora/gazetteers - corpora/genesis - corpora/gutenberg - corpora/ieer - corpora/inaugural - corpora/indian - corpora/lin_thesaurus - corpora/mac_morpho - corpora/movie_reviews - corpora/mte_teip5 - corpora/names - corpora/nonbreaking_prefixes - corpora/nps_chat - corpora/omw - corpora/opinion_lexicon - corpora/ppattach - corpora/product_reviews_1 - corpora/product_reviews_2 - corpora/pros_cons - corpora/ptb - corpora/qc - corpora/rte - corpora/senseval - corpora/sentence_polarity - corpora/sentiwordnet - corpora/shakespeare - corpora/sinica_treebank - corpora/state_union - corpora/stopwords - corpora/subjectivity - corpora/swadesh - corpora/switchboard - corpora/timit - corpora/toolbox - corpora/treebank - corpora/twitter_samples - corpora/udhr - corpora/udhr2 - corpora/verbnet - corpora/webtext - corpora/wordnet - corpora/wordnet_ic - corpora/words - grammars/book_grammars - grammars/large_grammars - grammars/sample_grammars - misc/perluniprops - models/bllip_wsj_no_aux - models/moses_sample - models/wmt15_eval - models/word2vec_sample - stemmers/porter_test - stemmers/rslp - taggers/averaged_perceptron_tagger - taggers/averaged_perceptron_tagger_ru - taggers/universal_tagset - tokenizers/punkt -) - -PACKAGES_UNPACK_EXTRA=( - chunkers/maxent_ne_chunker - corpora/biocreative_ppi - corpora/brown_tei - corpora/kimmo - corpora/paradigms - corpora/pe08 - corpora/pil - corpora/pl196x - corpora/problem_reports - corpora/smultron - corpora/unicode_samples - corpora/verbnet3 - corpora/ycoe - grammars/basque_grammars - grammars/spanish_grammars - help/tagsets - misc/mwa_ppdb - taggers/maxent_treebank_pos_tagger -) - -add_data() { - local x - for x; do - SRC_URI+=" - https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/${x}.zip - -> nltk-${x#*/}-${PV}.zip" - done -} - -add_data "${PACKAGES_ZIP[@]}" "${PACKAGES_UNPACK[@]}" -SRC_URI+=" - extra? (" -add_data "${PACKAGES_UNPACK_EXTRA[@]}" -SRC_URI+=" - )" - -CHECKREQS_DISK_USR=3G -CHECKREQS_DISK_BUILD=${CHECKREQS_DISK_USR} - -src_unpack() { - local x - local to_unpack=( "${PACKAGES_UNPACK[@]}" ) - use extra && to_unpack+=( "${PACKAGES_UNPACK_EXTRA[@]}" ) - for x in "${to_unpack[@]}"; do - local cat=${x%/*} - local pkg=${x#*/} - - mkdir -p "${S}/${cat}" || die - cd "${S}/${cat}" || die - unpack "nltk-${pkg}-${PV}.zip" - done -} - -src_install() { - dodir /usr/share/nltk_data - mv * "${ED}/usr/share/nltk_data/" || die - - local x - for x in "${PACKAGES_ZIP[@]}"; do - local cat=${x%/*} - local pkg=${x#*/} - - insinto "/usr/share/nltk_data/${cat}" - newins "${DISTDIR}/nltk-${pkg}-${PV}.zip" "${pkg}.zip" - done -} |