diff options
author | Michał Górny <mgorny@gentoo.org> | 2021-10-23 22:37:15 +0200 |
---|---|---|
committer | Michał Górny <mgorny@gentoo.org> | 2021-10-23 22:46:04 +0200 |
commit | 8fc9ac325105fab5a44261fd4874d87738d31c43 (patch) | |
tree | 7e6b6fe2f84b76a9d4d8860aecf492532656fa7f /dev-python/nltk-data | |
parent | net-misc/lldpd: update homepage (diff) | |
download | gentoo-8fc9ac325105fab5a44261fd4874d87738d31c43.tar.gz gentoo-8fc9ac325105fab5a44261fd4874d87738d31c43.tar.bz2 gentoo-8fc9ac325105fab5a44261fd4874d87738d31c43.zip |
dev-python/nltk-data: Update data files for 20211023
Closes: https://bugs.gentoo.org/819780
Signed-off-by: Michał Górny <mgorny@gentoo.org>
Diffstat (limited to 'dev-python/nltk-data')
-rw-r--r-- | dev-python/nltk-data/Manifest | 3 | ||||
-rw-r--r-- | dev-python/nltk-data/nltk-data-20211023.ebuild (renamed from dev-python/nltk-data/nltk-data-20200312-r1.ebuild) | 61 |
2 files changed, 42 insertions, 22 deletions
diff --git a/dev-python/nltk-data/Manifest b/dev-python/nltk-data/Manifest index fa415ee7f30e..0a6f8624fbaa 100644 --- a/dev-python/nltk-data/Manifest +++ b/dev-python/nltk-data/Manifest @@ -81,7 +81,7 @@ DIST nltk-smultron-20200312.zip 166207 BLAKE2B d0c3e75dd108965e260d913e0c02137da DIST nltk-snowball_data-20200312.zip 6785405 BLAKE2B 44c10439b142540ac7eece967efa1431fd8f45342f0a90875dacf29ad374fe4c7d30af11d42ba45e0f1ec1836d56b2ff684ee352c5e8536cfb5db5eb7632285b SHA512 6c8a9259d88f6f7f499867d83b731de99d7fa4e8827ecedf836f653fc1a810efa9f6c5c6e2720a9e6610bc00978956b6a119bd08b70e3e241c4e9faccddd81d8 DIST nltk-spanish_grammars-20200312.zip 4047 BLAKE2B d8a8dbb558850a6a60f1fe5ab0f617f3a0f3c64bc7d49980cf793d374c6679d1bd42afb7e61776737b5eec162f2520abf2ee3acc92ea9ee0f397c3089b3b5b28 SHA512 4513347156e9351c259c0e2448198d68354bbc95e0a54561c31a88f13f333ebcba3e294c820cb62036665f2904eb6a7137546cd580e361c0423c30a8aed950b2 DIST nltk-state_union-20200312.zip 808757 BLAKE2B 2d352af0ced736d3c11a821eaba0b035b3b5b6b0f20db3bd5d4ac2451f99daf68bffd3ad984bd404ecc4a1e67ca9281c529af2cc9e295a3a7330f36ee9640bc8 SHA512 a6fc83e6230e57ba66a7af62f0d2a5d44a14530ed1e0e914590b3f8b5bf939967c126a5e6f6899ba9134843893f65212e836d311109393c1200191a5c3163485 -DIST nltk-stopwords-20200312.zip 23047 BLAKE2B a0677cc0d4a3d54ee6e27eab8fb7635d6cd29265204896870e57457a54459f1d6cabc0c4e76e749397f5eee299eed0b524fcbc2033ea17d81cdb6cd98c5ed968 SHA512 31774fd3db2e0fba0209db71c08f9b2d971311ea4a59739cfdc0a9ae34f6c6c593f2a991a14ee20b0de8b380215e609f8398ed50c546775322ab8c4c3f8d06dd +DIST nltk-stopwords-20211023.zip 26220 BLAKE2B 50219508c5fb24c1392064ea0546ca9060829f51689c0d626bb99e3fa8f712df98ed475fc0d27d99f934fb156ce65df91a8e7c22e1b4f16833339fb0aea34a9e SHA512 8308623953560281288b64e695638ca3fa28e1b6201b538d01650d6dfd08e821687217c8d012e93adfa2a48afebdda11af1bb86d638358c2931f36754d1e15ef DIST nltk-subjectivity-20200312.zip 521628 BLAKE2B 0a8777a5b91b1b825fbde41cc927d496480129f0a810349bddde2036ed16f37611f2bc3b007e74fe36523612529a92433d32d094be72d247f5faef8220c3c491 SHA512 a3cc4d2d20f26c5eabd86fbced2c013e69d46e607013278eb35831a62e57523a17aec1b580ba62c7a867e61a561e1b222d8430f0c1e2d429a9479e12b008c5b7 DIST nltk-swadesh-20200312.zip 22828 BLAKE2B 1cd9d5355c6b53694ec545ca001b0807b4912a7878ba075b0f81ce8b9b22a5c7a18cf52cf2449483a1c89cd20d8d86986dc3d827fa93a7ef5824fddbc0922025 SHA512 90cb32532a5378d05ce34b84b5f8363dbb32f24afac58b0dcc5cdccba98fd7d37def7f4fbe76b11c8c64059bd19df745562bfbf5f4c721d65ce9f4be1348ab76 DIST nltk-switchboard-20200312.zip 791161 BLAKE2B 211116a751ae246fa31b6aca96b396d3642d89ad112588a09f8d91a5b76dc41c7fb4d36c16c6358cd8e0da8056bc83598ed0dc635cf7b1fd8469a0e80b5f1761 SHA512 690e5392dc082c4ac550bde2848aa65117e7a25cbc4bce0887581c531d03be64e21f044ac0a3286648255f0edd7766b1161f5575ad5fd680c7303b34c3226b8f @@ -102,6 +102,7 @@ DIST nltk-webtext-20200312.zip 646297 BLAKE2B ca072fc38c144b659c76c36c9161641c91 DIST nltk-wmt15_eval-20200312.zip 383096 BLAKE2B 119943db4240171077569b3302c678644c2c9547ba67bfd055751059e0a3ad3ab6a19e4eedf9108d313d46dcd36cf19e11d973981da8c70a01c4cb790a7bd739 SHA512 362395d4c77ebe92f4c19fa8c2000082dc7a2343acc19cccb596ca00db6c40c231b904d807f46e2691cc4c4a0c79d14873b2a1983a494f2ca1485d540d787ceb DIST nltk-word2vec_sample-20200312.zip 49396025 BLAKE2B 0512b9bb7121a528190079f578e82b4e8f8021bfc0062cfa5613d260f3eee17460aecfeaacb65d950e79d27a653c78633a88c3638ec16377e2dbc3006387ebaa SHA512 09c30a4ab8f9fb6a5b36974b5953260d01cb4f285827fb90a374d054ad775ac978602ab56c452f46f4f8601312e232fd739d2f54dafa44ab8b7b01831cf0d9d1 DIST nltk-wordnet-20200312.zip 10775600 BLAKE2B dac56a8fb1fa6882b1871c394ad2acb2d3be739c424570e27c89fb6983df5f896a8f359092ba82752ddfc0531d83563a219e85f80124202f29bda93181efe4dc SHA512 1923a8bcd56fa0b9a9de91f53070dce28c3a7efbab11d2ef55c87134b1bf30de0f40abab59c39eb15dce54aec9491d8a5a259de212ff4cb25cde0ad09317009a +DIST nltk-wordnet31-20211023.zip 11055271 BLAKE2B e41a1951af5a71c9506d1e948b860574c94ab0ef31c1789a7e7bfb29c6dccea5b1d8895007631f7b595e9f90306365b5042e7a80dc6e1364fdbf4a5f0cba3b28 SHA512 a86091bd55e3a706892550b232be8f5199092623f1f8305d8c9be967a8527fe7d4ecb6250c369b229fdf52b6f3008106b758adc355fa2ad08b5b0cf2a458c173 DIST nltk-wordnet_ic-20200312.zip 12056682 BLAKE2B c2dc2a646015b23699a72f636b588ec5718c70e6941d9d56863257e1e0396c8cf59ac1dc6ed74e5d7f0c2ee9129d63221a03967bf66a3d335e99160f295ed44d SHA512 1c94451a13af6c76bff60a0cab2e70402a3d9abd2e8fc62a5473f24ab4229feb0afe4faa8d389734697a6cf86d2c8b1dc700bb3afa3cbc279b75d7e0ec19fc6d DIST nltk-words-20200312.zip 757777 BLAKE2B eaaaaab6c26e206e9b6ce45daf779e3cc6706a06132afeabf013026d0009caee2d678f3c4ea9125b9654f7143bef29ec7a5706b79e5650ea556c6821b7754e6f SHA512 2810f05d3fc7ee6b6f8636fa1ff7b4e8c8cdac12b415cc54d15c69102290122ea138ec4fa36cb483f790c1ac10b0f83ae4c2c3e0e8df7e67e90e962ee5dbb0be DIST nltk-ycoe-20200312.zip 477 BLAKE2B 574835aa011a06a06363e26facd6a6f583a1dc1cac2de39adff59d8ab48eefac030b43d935a2f79af855259f2a9a571193dae2811589483af97406ff05c76c9e SHA512 e39ce165074d10ff63cb84ea52905d7ecb937797c8123ed113c5609afe1f63ac44d04d48a681002c4eac21dc9076ac74164b886c6f9ce42f3a102c38d1e8e756 diff --git a/dev-python/nltk-data/nltk-data-20200312-r1.ebuild b/dev-python/nltk-data/nltk-data-20211023.ebuild index 4a3d58c5db8a..df8437c785be 100644 --- a/dev-python/nltk-data/nltk-data-20200312-r1.ebuild +++ b/dev-python/nltk-data/nltk-data-20211023.ebuild @@ -1,4 +1,4 @@ -# Copyright 2020 Gentoo Authors +# Copyright 2020-2021 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 EAPI=7 @@ -18,7 +18,7 @@ RESTRICT="bindist mirror" BDEPEND="app-arch/unzip" -PACKAGES_ZIP=( +PACKAGES_ZIP_2020=( # wget -O - https://www.nltk.org/nltk_data/ | xml sel -t -m '//package[@unzip=0]' -v @subdir -o "/" -v @id -n - | sort corpora/comtrans corpora/conll2007 @@ -36,7 +36,7 @@ PACKAGES_ZIP=( stemmers/snowball_data ) -PACKAGES_UNPACK=( +PACKAGES_UNPACK_2020=( # wget -O - https://www.nltk.org/nltk_data/ | xml sel -t -m '//package[@unzip=1]' -v @subdir -o "/" -v @id -n - | sort corpora/abc corpora/alpino @@ -85,7 +85,6 @@ PACKAGES_UNPACK=( corpora/shakespeare corpora/sinica_treebank corpora/state_union - corpora/stopwords corpora/subjectivity corpora/swadesh corpora/switchboard @@ -116,7 +115,12 @@ PACKAGES_UNPACK=( tokenizers/punkt ) -PACKAGES_UNPACK_EXTRA=( +PACKAGES_UNPACK_2021=( + corpora/stopwords + corpora/wordnet31 +) + +PACKAGES_UNPACK_EXTRA_2020=( chunkers/maxent_ne_chunker corpora/biocreative_ppi corpora/brown_tei @@ -137,48 +141,63 @@ PACKAGES_UNPACK_EXTRA=( ) add_data() { - local x + local x version=${1} + shift + for x; do SRC_URI+=" https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/${x}.zip - -> nltk-${x#*/}-${PV}.zip" + -> nltk-${x#*/}-${version}.zip" done } -add_data "${PACKAGES_ZIP[@]}" "${PACKAGES_UNPACK[@]}" +add_data 20200312 "${PACKAGES_ZIP_2020[@]}" "${PACKAGES_UNPACK_2020[@]}" +add_data 20211023 "${PACKAGES_UNPACK_2021[@]}" SRC_URI+=" extra? (" -add_data "${PACKAGES_UNPACK_EXTRA[@]}" +add_data 20200312 "${PACKAGES_UNPACK_EXTRA_2020[@]}" SRC_URI+=" )" CHECKREQS_DISK_USR=3G CHECKREQS_DISK_BUILD=${CHECKREQS_DISK_USR} -src_unpack() { - local x - local to_unpack=( "${PACKAGES_UNPACK[@]}" ) - use extra && to_unpack+=( "${PACKAGES_UNPACK_EXTRA[@]}" ) - for x in "${to_unpack[@]}"; do +unpack_data() { + local x version=${1} + shift + + for x; do local cat=${x%/*} local pkg=${x#*/} mkdir -p "${S}/${cat}" || die cd "${S}/${cat}" || die - unpack "nltk-${pkg}-${PV}.zip" + unpack "nltk-${pkg}-${version}.zip" done } -src_install() { - dodir /usr/share/nltk_data - mv * "${ED}/usr/share/nltk_data/" || die +src_unpack() { + unpack_data 20200312 "${PACKAGES_UNPACK_2020[@]}" + unpack_data 20211023 "${PACKAGES_UNPACK_2021[@]}" + use extra && unpack_data 20200312 "${PACKAGES_UNPACK_EXTRA_2020[@]}" +} + +install_zips() { + local x version=${1} + shift - local x - for x in "${PACKAGES_ZIP[@]}"; do + for x; do local cat=${x%/*} local pkg=${x#*/} insinto "/usr/share/nltk_data/${cat}" - newins "${DISTDIR}/nltk-${pkg}-${PV}.zip" "${pkg}.zip" + newins "${DISTDIR}/nltk-${pkg}-${version}.zip" "${pkg}.zip" done } + +src_install() { + dodir /usr/share/nltk_data + mv * "${ED}/usr/share/nltk_data/" || die + + install_zips 20200312 "${PACKAGES_ZIP_2020[@]}" +} |