2023-08-14 18:07:16 +02:00
|
|
|
{ lib, newScope, fetchFromGitHub, unzip, stdenvNoCC }:
|
|
|
|
let
|
|
|
|
base = {
|
|
|
|
version = "unstable-2023-02-02";
|
|
|
|
nativeBuildInputs = [ unzip ];
|
|
|
|
dontBuild = true;
|
|
|
|
meta = with lib; {
|
|
|
|
description = "NLTK Data";
|
|
|
|
homepage = "https://github.com/nltk/nltk_data";
|
|
|
|
license = licenses.asl20;
|
|
|
|
platforms = platforms.all;
|
|
|
|
maintainers = with maintainers; [ happysalada ];
|
|
|
|
};
|
|
|
|
};
|
|
|
|
makeNltkDataPackage = {pname, location, hash}:
|
|
|
|
let
|
|
|
|
src = fetchFromGitHub {
|
|
|
|
owner = "nltk";
|
|
|
|
repo = "nltk_data";
|
|
|
|
rev = "5db857e6f7df11eabb5e5665836db9ec8df07e28";
|
|
|
|
inherit hash;
|
2023-09-19 19:05:42 +02:00
|
|
|
sparseCheckout = [ "packages/${location}/${pname}.zip" ];
|
2023-08-14 18:07:16 +02:00
|
|
|
};
|
|
|
|
in
|
|
|
|
stdenvNoCC.mkDerivation (base // {
|
|
|
|
inherit pname src;
|
|
|
|
version = base.version;
|
|
|
|
installPhase = ''
|
|
|
|
runHook preInstall
|
|
|
|
|
|
|
|
mkdir -p $out
|
2023-09-19 19:05:42 +02:00
|
|
|
unzip ${src}/packages/${location}/${pname}.zip
|
|
|
|
mkdir -p $out/${location}
|
|
|
|
cp -R ${pname}/ $out/${location}
|
2023-08-14 18:07:16 +02:00
|
|
|
|
|
|
|
runHook postInstall
|
|
|
|
'';
|
|
|
|
});
|
|
|
|
in
|
|
|
|
lib.makeScope newScope (self: {
|
|
|
|
punkt = makeNltkDataPackage ({
|
|
|
|
pname = "punkt";
|
2023-09-19 19:05:42 +02:00
|
|
|
location = "tokenizers";
|
2023-08-14 18:07:16 +02:00
|
|
|
hash = "sha256-rMkgn3xzmSJNv8//kqbPF2Xq3Gf16lgA1Wx8FPYbaQo=";
|
|
|
|
});
|
|
|
|
averaged_perceptron_tagger = makeNltkDataPackage ({
|
|
|
|
pname = "averaged_perceptron_tagger";
|
2023-09-19 19:05:42 +02:00
|
|
|
location = "taggers";
|
2023-08-14 18:07:16 +02:00
|
|
|
hash = "sha256-ilTs4HWPUoHxQb4kWEy3wJ6QsE/98+EQya44gtV2inw=";
|
|
|
|
});
|
2023-09-19 19:06:06 +02:00
|
|
|
stopwords = makeNltkDataPackage ({
|
|
|
|
pname = "stopwords";
|
|
|
|
location = "corpora";
|
|
|
|
hash = "sha256-Rj1jnt6IDEmBbSIHHueyEvPmdE4EZ6/bJ3qehniebbk=";
|
|
|
|
});
|
2023-08-14 18:07:16 +02:00
|
|
|
})
|