{"package":"datatrove","ecosystem":"pypi","latest_version":"0.9.0","description":"HuggingFace library to process and filter large amounts of webdata","license":"Apache-2.0","license_risk":"permissive","commercial_use_notes":"Permissive: commercial closed-source use OK; preserve the copyright notice.","homepage":"https://pypi.org/project/datatrove/","repository":"https://github.com/huggingface/datatrove","downloads_weekly":9211,"health":{"score":57,"risk":"high","breakdown":{"maintenance":20,"popularity":6,"security":25,"maturity":6,"community":0},"deprecated":false,"max_score":100},"vulnerabilities":{"count":0,"critical":0,"high":0,"medium":0,"low":0,"details":[]},"versions":{"latest":"0.9.0","total_count":10,"recent":["0.0.1.dev0","0.0.1","0.2.0","0.3.0","0.4.0","0.5.0","0.6.0","0.7.0","0.8.0","0.9.0"]},"metadata":{"deprecated":false,"deprecated_message":null,"maintainers_count":0,"first_published":null,"last_published":"2026-03-04T13:44:33.968520Z","dependencies_count":72,"dependencies":["dill>=0.3.0","fsspec>=2023.12.2","huggingface-hub<1.0,>=0.34.0","humanize","loguru>=0.7.0","multiprocess","numpy>=2.0.0","tqdm","rich; extra == \"cli\"","faust-cchardet; extra == \"io\"","pyarrow; extra == \"io\"","python-magic; extra == \"io\"","warcio; extra == \"io\"","datasets>=3.1.0; extra == \"io\"","orjson; extra == \"io\"","zstandard; extra == \"io\"","s3fs>=2023.12.2; extra == \"s3\"","fasttext-numpy2-wheel; extra == \"processing\"","nltk; extra == \"processing\"","inscriptis; extra == \"processing\"","tldextract; extra == \"processing\"","trafilatura<1.12.0,>=1.8.0; extra == \"processing\"","tokenizers; extra == \"processing\"","ftfy; extra == \"processing\"","fasteners; extra == \"processing\"","regex; extra == \"processing\"","xxhash; extra == \"processing\"","pyahocorasick; extra == \"processing\"","lighteval>=0.3.0; extra == \"decont\"","spacy[ja]>=3.8; extra == \"multilingual\"","stanza; extra == \"multilingual\"","pyvi; extra == \"multilingual\"","pythainlp; extra == \"multilingual\"","jieba; extra == \"multilingual\"","indic-nlp-library; extra == \"multilingual\"","kiwipiepy<0.22.0; extra == \"multilingual\"","urduhack; extra == \"multilingual\"","tensorflow>=2.16; extra == \"multilingual\"","khmer-nltk; extra == \"multilingual\"","laonlp; extra == \"multilingual\"","botok; extra == \"multilingual\"","pyidaungsu-numpy2; extra == \"multilingual\"","datatrove[io]; extra == \"inference\"","aiofiles; extra == \"inference\"","httpx; extra == \"inference\"","aiosqlite; extra == \"inference\"","vllm; extra == \"inference\"","sglang; extra == \"inference\"","bitsandbytes; extra == \"inference\"","numpy<2.3,>=2.0.0; extra == \"inference\"","typer; extra == \"inference\"","pyyaml; extra == \"inference\"","pandas; extra == \"inference\"","transformers>=4.57; extra == \"inference\"","ray[default]; extra == \"ray\"","ruff>=0.1.5; extra == \"quality\"","datatrove[cli]; extra == \"testing\"","datatrove[io]; extra == \"testing\"","datatrove[processing]; extra == \"testing\"","datatrove[multilingual]; extra == \"testing\"","datatrove[s3]; extra == \"testing\"","datatrove[ray]; extra == \"testing\"","datatrove[inference]; extra == \"testing\"","flask>=3.1.0; extra == \"testing\"","pytest; extra == \"testing\"","pytest-rerunfailures; extra == \"testing\"","pytest-timeout; extra == \"testing\"","pytest-xdist; extra == \"testing\"","moto[s3,server]; extra == \"testing\"","datatrove[quality]; extra == \"all\"","datatrove[testing]; extra == \"all\"","datatrove[all]; extra == \"dev\""]},"github_stats":null,"bundle":null,"typescript":null,"known_issues":{"bugs_count":0,"bugs_severity":{},"status_breakdown":{},"link":null,"scope":"none"},"historical_compromise":null,"recommendation":{"action":"safe_to_use","issues":[],"use_version":"0.9.0","version_hint":null,"summary":"datatrove@0.9.0 is safe to use (health: 57/100)"},"version_scoped":null,"requested_version":null,"_cache":"miss","_response_ms":834,"_powered_by":"depscope.dev — free package intelligence for AI agents","typosquat":{"is_suspected":false},"maintainer_trust":{"available":false},"malicious":{"is_malicious":false},"scorecard":{"available":false},"quality":{"available":false},"version_history_summary":{"total_versions":10,"first_release_age_days":null,"last_release_days_ago":57,"avg_days_between_releases":null,"release_velocity":"active"}}