| [build-system] |
| requires = ["setuptools>=61.0", "wheel"] |
| build-backend = "setuptools.build_meta" |
|
|
| [project] |
| name = "doc_redaction" |
| version = "2.2.3" |
| description = "Redact PDF/image-based documents, Word, or CSV/XLSX files using a Gradio-based GUI interface" |
| readme = "README_PYPI.md" |
| authors = [ |
| { name = "Sean Pedrick-Case", email = "spedrickcase@lambeth.gov.uk" }, |
| ] |
| maintainers = [ |
| { name = "Sean Pedrick-Case", email = "spedrickcase@lambeth.gov.uk" }, |
| ] |
| license = "AGPL-3.0-only" |
| keywords = [ |
| "redaction", |
| "pdf", |
| "nlp", |
| "documents", |
| "document-processing", |
| "gradio", |
| "pii", |
| "pii-detection" |
| ] |
| classifiers = [ |
| "Development Status :: 5 - Production/Stable", |
| "Intended Audience :: Developers", |
| "Intended Audience :: Legal Industry", |
| "Topic :: Text Processing :: General", |
| "Topic :: Security :: Cryptography", |
| "Programming Language :: Python :: 3", |
| "Programming Language :: Python :: 3.10", |
| "Programming Language :: Python :: 3.11", |
| "Programming Language :: Python :: 3.12", |
| "Programming Language :: Python :: 3.13" |
| ] |
| requires-python = ">=3.10" |
| dependencies = [ |
| "pdfminer.six<=20260107", |
| "pdf2image<=1.17.0", |
| "pymupdf<=1.27.1", |
| "bleach<=6.3.0", |
| "opencv-python<=4.13.0.92", |
| "presidio_analyzer<=2.2.362", |
| "presidio_anonymizer<=2.2.362", |
| "presidio-image-redactor<=0.0.58", |
| "pikepdf<=10.3.0", |
| "pandas<=2.3.3", |
| "scikit-learn<=1.8.0", |
| "spacy<=3.8.14", |
| "gradio<=6.10.0", |
| "boto3<=1.42.91", |
| "pyarrow<=23.0.1", |
| "openpyxl<=3.1.5", |
| "Faker<=40.8.0", |
| "python-levenshtein<=0.27.3", |
| "spaczz<=0.6.1", |
| "gradio_image_annotation_redaction==0.5.5", |
| "rapidfuzz<=3.14.5", |
| "python-dotenv<=1.2.2", |
| "awslambdaric<=3.1.1", |
| "python-docx<=1.2.0", |
| "polars<=1.38.1", |
| "defusedxml<=0.7.1", |
| "numpy<=2.4.4", |
| "spaces<=0.48.3", |
| "google-genai<=1.73.0", |
| "openai<=2.31.0", |
| "markdown<=3.10.2", |
| "tabulate<=0.10.0" |
| ] |
|
|
| [project.optional-dependencies] |
|
|
| |
| dev = ["pytest"] |
| test = ["pytest", "pytest-cov"] |
|
|
| |
|
|
| |
| |
| paddle = [ |
| "protobuf<=7.34.0", |
| "paddlepaddle>=3.0.0,<=3.2.1", |
| "paddleocr<=3.3.0", |
| "pycocotools<=2.0.10", |
| ] |
|
|
| |
| |
| vlm = [ |
| "torch<=2.9.1", |
| "torchvision<=0.24.1", |
| "transformers<=5.5.4", |
| "accelerate<=1.13.0", |
| "bitsandbytes<=0.49.2", |
| "sentencepiece<=0.2.1", |
| |
| |
| |
| ] |
|
|
| |
| mcp = [ |
| "gradio[mcp]<=6.10.0" |
| ] |
|
|
| [project.urls] |
| Homepage = "https://seanpedrick-case.github.io/doc_redaction/" |
| Repository = "https://github.com/seanpedrick-case/doc_redaction" |
|
|
| [project.scripts] |
| cli_redact = "doc_redaction.cli_redact:main" |
| mcp_doc_redaction = "mcp_doc_redaction.server:main" |
| doc_redaction_install_deps = "doc_redaction.install_deps:main" |
|
|
| [tool.setuptools] |
| include-package-data = true |
| py-modules = [ |
| "app", |
| "agent_routes", |
| "cli_redact", |
| "lambda_entrypoint", |
| "load_dynamo_logs", |
| "load_s3_logs", |
| ] |
|
|
| [tool.setuptools.packages.find] |
| where = ["."] |
| include = ["doc_redaction*", "tools*", "mcp_doc_redaction*"] |
| exclude = [ |
| "test*", |
| "skills*", |
| "cdk*", |
| "src*", |
| "example_data*", |
| ] |
|
|
| |
| [tool.ruff] |
| line-length = 88 |
|
|
| [tool.ruff.lint] |
| select = ["E", "F", "I"] |
| ignore = [ |
| "E501", |
| "E402", |
| ] |
|
|
| [tool.ruff.lint.per-file-ignores] |
| "__init__.py" = ["F401"] |
|
|
| |
| [tool.black] |
| line-length = 88 |
| target-version = ['py310'] |
|
|
| |
| [tool.pytest.ini_options] |
| markers = [ |
| "integration: optional slow tests (CLI PDF smoke; set PYTEST_CLI_INTEGRATION=1 where needed)", |
| ] |
| filterwarnings = [ |
| "ignore::DeprecationWarning:click.parser", |
| "ignore::DeprecationWarning:weasel.util.config", |
| "ignore::DeprecationWarning:builtin type", |
| "ignore::DeprecationWarning:websockets.legacy", |
| "ignore::DeprecationWarning:websockets.server", |
| "ignore::DeprecationWarning:spacy.cli._util", |
| "ignore::DeprecationWarning:weasel.util.config", |
| "ignore::DeprecationWarning:importlib._bootstrap", |
| ] |
| testpaths = ["test"] |
| python_files = ["test_*.py", "*_test.py"] |
| python_classes = ["Test*"] |
| python_functions = ["test_*"] |
| addopts = [ |
| "-v", |
| "--tb=short", |
| "--strict-markers", |
| "--disable-warnings", |
| "-m", |
| "not integration", |
| ] |