{"package":"unstructured","ecosystem":"pypi","latest_version":"0.22.23","description":"A library that prepares raw documents for downstream ML tasks.","license":"Apache-2.0","license_risk":"permissive","commercial_use_notes":"Permissive: commercial closed-source use OK; preserve the copyright notice.","homepage":"https://pypi.org/project/unstructured/","repository":"https://github.com/Unstructured-IO/unstructured","downloads_weekly":1199770,"health":{"score":82,"risk":"low","breakdown":{"maintenance":25,"popularity":17,"security":25,"maturity":15,"community":0},"deprecated":false,"max_score":100},"vulnerabilities":{"count":0,"critical":0,"high":0,"medium":0,"low":0,"details":[]},"versions":{"latest":"0.22.23","total_count":217,"recent":["0.18.26","0.18.27","0.18.31","0.18.32","0.20.2","0.20.6","0.20.8","0.21.0","0.21.1","0.21.2","0.21.5","0.22.6","0.22.10","0.22.12","0.22.16","0.22.18","0.22.20","0.22.21","0.22.22","0.22.23"]},"metadata":{"deprecated":false,"deprecated_message":null,"maintainers_count":0,"first_published":null,"last_published":"2026-04-24T18:43:19.669583Z","dependencies_count":119,"dependencies":["beautifulsoup4<5.0.0,>=4.14.3","charset-normalizer<4.0.0,>=3.4.4","emoji<3.0.0,>=2.15.0","filelock<4.0.0,>=3.12.0","filetype<2.0.0,>=1.2.0","html5lib<2.0.0,>=1.1","installer<1.0.0,>=0.7.0","langdetect<2.0.0,>=1.0.9","lxml<7.0.0,>=5.0.0","numba<1.0.0,>=0.60.0","numpy<3.0.0,>=1.26.0","psutil<8.0.0,>=7.2.2","python-iso639<2027.0.0,>=2026.1.31","python-magic<1.0.0,>=0.4.27","python-oxmsg<1.0.0,>=0.0.2","rapidfuzz<4.0.0,>=3.14.3","regex<2027.0.0,>=2024.0.0","requests<3.0.0,>=2.32.5","spacy<4.0.0,>=3.7.0","tqdm<5.0.0,>=4.67.3","typing-extensions<5.0.0,>=4.15.0","unstructured-client<1.0.0,>=0.25.9","wrapt<3.0.0,>=2.1.1","google-cloud-vision<4.0.0,>=3.12.1; extra == \"all-docs\"","markdown<4.0.0,>=3.10.1; extra == \"all-docs\"","msoffcrypto-tool<7.0.0,>=6.0.0; extra == \"all-docs\"","networkx<4.0.0,>=3.2.0; extra == \"all-docs\"","openai-whisper<20270000,>=20231117; extra == \"all-docs\"","openpyxl<4.0.0,>=3.1.5; extra == \"all-docs\"","pandas<3.0.0,>=2.0.0; extra == \"all-docs\"","pdf2image<2.0.0,>=1.17.0; extra == \"all-docs\"","pdfminer-six<20270000,>=20251230; extra == \"all-docs\"","pi-heif<2.0.0,>=1.2.0; extra == \"all-docs\"","pikepdf<11.0.0,>=10.3.0; extra == \"all-docs\"","pypandoc-binary<2.0.0,>=1.16.2; platform_system != \"Windows\" and extra == \"all-docs\"","pypandoc-binary<2.0.0,>=1.16.2; (platform_system == \"Windows\" and python_version < \"3.13\") and extra == \"all-docs\"","pypdf<7.0.0,>=6.6.2; extra == \"all-docs\"","python-docx<2.0.0,>=1.2.0; extra == \"all-docs\"","python-pptx<2.0.0,>=1.0.2; extra == \"all-docs\"","unstructured-inference<2.0.0,>=1.2.0; (platform_system != \"Windows\" and python_version < \"3.12\") and extra == \"all-docs\"","unstructured-inference<2.0.0,>=1.6.6; (platform_system != \"Windows\" and python_version >= \"3.12\") and extra == \"all-docs\"","unstructured-inference<2.0.0,>=1.6.6; (platform_system == \"Windows\" and python_version >= \"3.12\" and python_version < \"3.13\") and extra == \"all-docs\"","unstructured-pytesseract<1.0.0,>=0.3.15; extra == \"all-docs\"","xlrd<3.0.0,>=2.0.1; extra == \"all-docs\"","openai-whisper<20270000,>=20231117; extra == \"audio\"","tiktoken<1.0.0,>=0.12.0; extra == \"chunking-tokens\"","pandas<3.0.0,>=2.0.0; extra == \"csv\"","python-docx<2.0.0,>=1.2.0; extra == \"doc\"","python-docx<2.0.0,>=1.2.0; extra == \"docx\"","pypandoc-binary<2.0.0,>=1.16.2; platform_system != \"Windows\" and extra == \"epub\"","pypandoc-binary<2.0.0,>=1.16.2; (platform_system == \"Windows\" and python_version < \"3.13\") and extra == \"epub\"","sentencepiece<1.0.0,>=0.2.0; extra == \"huggingface\"","torch<3.0.0,>=2.10.0; platform_system != \"Windows\" and extra == \"huggingface\"","torch<3.0.0,>=2.10.0; (platform_system == \"Windows\" and python_version < \"3.13\") and extra == \"huggingface\"","transformers<6.0.0,>=5.2.0; extra == \"huggingface\"","google-cloud-vision<4.0.0,>=3.12.1; extra == \"image\"","pdf2image<2.0.0,>=1.17.0; extra == \"image\"","pdfminer-six<20270000,>=20251230; extra == \"image\"","pi-heif<2.0.0,>=1.2.0; extra == \"image\"","pikepdf<11.0.0,>=10.3.0; extra == \"image\"","pypdf<7.0.0,>=6.6.2; extra == \"image\"","unstructured-inference<2.0.0,>=1.2.0; (platform_system != \"Windows\" and python_version < \"3.12\") and extra == \"image\"","unstructured-inference<2.0.0,>=1.6.6; (platform_system != \"Windows\" and python_version >= \"3.12\") and extra == \"image\"","unstructured-inference<2.0.0,>=1.6.6; (platform_system == \"Windows\" and python_version >= \"3.12\" and python_version < \"3.13\") and extra == \"image\"","unstructured-pytesseract<1.0.0,>=0.3.15; extra == \"image\"","unstructured-ingest[airtable,astradb,azure,azure-ai-search,bedrock,biomed,box,chroma,confluence,couchbase,databricks-volumes,delta-table,discord,dropbox,elasticsearch,gcs,github,gitlab,google-drive,hubspot,huggingface,jira,kafka,kdbai,milvus,mongodb,notion,octoai,onedrive,openai,opensearch,outlook,pinecone,postgres,qdrant,reddit,remote,s3,salesforce,sftp,sharepoint,singlestore,slack,vectara,vertexai,voyageai,weaviate,wikipedia]<2.0.0,>=1.4.0; platform_system != \"Windows\" and extra == \"ingest\"","unstructured-ingest[airtable,astradb,azure,azure-ai-search,bedrock,biomed,box,chroma,confluence,couchbase,databricks-volumes,delta-table,discord,dropbox,elasticsearch,gcs,github,gitlab,google-drive,hubspot,huggingface,jira,kafka,kdbai,milvus,mongodb,notion,octoai,onedrive,openai,opensearch,outlook,pinecone,postgres,qdrant,reddit,remote,s3,salesforce,sftp,sharepoint,singlestore,slack,vectara,vertexai,voyageai,weaviate,wikipedia]<2.0.0,>=1.4.0; (platform_system == \"Windows\" and python_version < \"3.13\") and extra == \"ingest\"","google-cloud-vision<4.0.0,>=3.12.1; extra == \"local-inference\"","markdown<4.0.0,>=3.10.1; extra == \"local-inference\"","msoffcrypto-tool<7.0.0,>=6.0.0; extra == \"local-inference\"","networkx<4.0.0,>=3.2.0; extra == \"local-inference\"","openai-whisper<20270000,>=20231117; extra == \"local-inference\"","openpyxl<4.0.0,>=3.1.5; extra == \"local-inference\"","pandas<3.0.0,>=2.0.0; extra == \"local-inference\"","pdf2image<2.0.0,>=1.17.0; extra == \"local-inference\"","pdfminer-six<20270000,>=20251230; extra == \"local-inference\"","pi-heif<2.0.0,>=1.2.0; extra == \"local-inference\"","pikepdf<11.0.0,>=10.3.0; extra == \"local-inference\"","pypandoc-binary<2.0.0,>=1.16.2; platform_system != \"Windows\" and extra == \"local-inference\"","pypandoc-binary<2.0.0,>=1.16.2; (platform_system == \"Windows\" and python_version < \"3.13\") and extra == \"local-inference\"","pypdf<7.0.0,>=6.6.2; extra == \"local-inference\"","python-docx<2.0.0,>=1.2.0; extra == \"local-inference\"","python-pptx<2.0.0,>=1.0.2; extra == \"local-inference\"","unstructured-inference<2.0.0,>=1.2.0; (platform_system != \"Windows\" and python_version < \"3.12\") and extra == \"local-inference\"","unstructured-inference<2.0.0,>=1.6.6; (platform_system != \"Windows\" and python_version >= \"3.12\") and extra == \"local-inference\"","unstructured-inference<2.0.0,>=1.6.6; (platform_system == \"Windows\" and python_version >= \"3.12\" and python_version < \"3.13\") and extra == \"local-inference\"","unstructured-pytesseract<1.0.0,>=0.3.15; extra == \"local-inference\"","xlrd<3.0.0,>=2.0.1; extra == \"local-inference\"","markdown<4.0.0,>=3.10.1; extra == \"md\"","pypandoc-binary<2.0.0,>=1.16.2; platform_system != \"Windows\" and extra == \"odt\"","pypandoc-binary<2.0.0,>=1.16.2; (platform_system == \"Windows\" and python_version < \"3.13\") and extra == \"odt\"","python-docx<2.0.0,>=1.2.0; extra == \"odt\"","pypandoc-binary<2.0.0,>=1.16.2; platform_system != \"Windows\" and extra == \"org\"","pypandoc-binary<2.0.0,>=1.16.2; (platform_system == \"Windows\" and python_version < \"3.13\") and extra == \"org\"","paddlepaddle<4.0.0,>=3.3.0; (platform_machine != \"aarch64\" and platform_system != \"Windows\") and extra == \"paddleocr\"","paddlepaddle<4.0.0,>=3.3.0; (platform_system == \"Windows\" and python_version < \"3.13\") and extra == \"paddleocr\"","unstructured-paddleocr==2.10.0; extra == \"paddleocr\"","google-cloud-vision<4.0.0,>=3.12.1; extra == \"pdf\"","pdf2image<2.0.0,>=1.17.0; extra == \"pdf\"","pdfminer-six<20270000,>=20251230; extra == \"pdf\"","pi-heif<2.0.0,>=1.2.0; extra == \"pdf\"","pikepdf<11.0.0,>=10.3.0; extra == \"pdf\"","pypdf<7.0.0,>=6.6.2; extra == \"pdf\"","unstructured-inference<2.0.0,>=1.2.0; (platform_system != \"Windows\" and python_version < \"3.12\") and extra == \"pdf\"","unstructured-inference<2.0.0,>=1.6.6; (platform_system != \"Windows\" and python_version >= \"3.12\") and extra == \"pdf\"","unstructured-inference<2.0.0,>=1.6.6; (platform_system == \"Windows\" and python_version >= \"3.12\" and python_version < \"3.13\") and extra == \"pdf\"","unstructured-pytesseract<1.0.0,>=0.3.15; extra == \"pdf\"","python-pptx<2.0.0,>=1.0.2; extra == \"ppt\"","python-pptx<2.0.0,>=1.0.2; extra == \"pptx\"","pypandoc-binary<2.0.0,>=1.16.2; platform_system != \"Windows\" and extra == \"rst\"","pypandoc-binary<2.0.0,>=1.16.2; (platform_system == \"Windows\" and python_version < \"3.13\") and extra == \"rst\"","pypandoc-binary<2.0.0,>=1.16.2; platform_system != \"Windows\" and extra == \"rtf\"","pypandoc-binary<2.0.0,>=1.16.2; (platform_system == \"Windows\" and python_version < \"3.13\") and extra == \"rtf\"","pandas<3.0.0,>=2.0.0; extra == \"tsv\"","msoffcrypto-tool<7.0.0,>=6.0.0; extra == \"xlsx\"","networkx<4.0.0,>=3.2.0; extra == \"xlsx\"","openpyxl<4.0.0,>=3.1.5; extra == \"xlsx\"","pandas<3.0.0,>=2.0.0; extra == \"xlsx\"","xlrd<3.0.0,>=2.0.1; extra == \"xlsx\""]},"github_stats":null,"bundle":null,"typescript":null,"known_issues":{"bugs_count":0,"bugs_severity":{},"status_breakdown":{},"link":null,"scope":"none"},"historical_compromise":null,"recommendation":{"action":"safe_to_use","issues":[],"use_version":"0.22.23","version_hint":null,"summary":"unstructured@0.22.23 is safe to use (health: 82/100)"},"version_scoped":null,"requested_version":null,"_cache":"hit","_response_ms":0,"_powered_by":"depscope.dev — free package intelligence for AI agents","typosquat":{"is_suspected":false},"maintainer_trust":{"available":false},"malicious":{"is_malicious":false},"scorecard":{"available":false},"quality":{"available":true,"criticality_score":null,"criticality_tier":null,"velocity_pct":null,"velocity_trend":null,"publish_security":"api_token"},"version_history_summary":{"total_versions":20,"first_release_age_days":null,"last_release_days_ago":4,"avg_days_between_releases":null,"release_velocity":"active"}}