commit acdf6431d6c5251d97925761af127a6e3abfd90b Author: Mark Bailey Date: Fri Oct 25 12:38:24 2024 -0400 chore: initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1bccb74 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.venv/ +__pycache__/ +data/ +r.http diff --git a/README.md b/README.md new file mode 100644 index 0000000..ca2fc88 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# DocTR Python API w/ FastAPI diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..977a153 --- /dev/null +++ b/src/main.py @@ -0,0 +1,93 @@ +import os +import pathlib +from fastapi.responses import JSONResponse +import numpy as np + +from doctr.io import DocumentFile +from doctr.models import ocr_predictor +from fastapi import FastAPI, HTTPException, Response +from jinja2 import Environment, FileSystemLoader, select_autoescape +from urllib.parse import unquote + +app = FastAPI() + +env = Environment(loader=FileSystemLoader("templates"), autoescape=select_autoescape()) + + +@app.get("/") +def root(): + base_dir = "/home/m/Projects/sandbox/data/app/upload/classifier/deal/389/" + files = os.listdir(base_dir) + + predictor = ocr_predictor( + "db_resnet50", + pretrained=True, + assume_straight_pages=False, + preserve_aspect_ratio=True, + ) + + result = {} + i = 0 + for f in files: + if i >= 1: + break + + if pathlib.Path(f).suffix not in [".jpg", ".png"]: + continue + + print("Working on: " + f) + doc = DocumentFile.from_images(base_dir + f) + + pred_res = predictor(doc) + result[f] = pred_res.render() + + i += 1 + + return Response(renderTemplate("main.html", {"results": result})) + + +@app.get("/detect/") +def file(file_name: str): + file_name = os.path.expanduser("~/Projects/sandbox/")+unquote(file_name).lstrip("/") + + print("Working on: " + file_name) + + try: + predictor = ocr_predictor( + "db_resnet50", + "vitstr_base", + pretrained=True, + assume_straight_pages=False, + preserve_aspect_ratio=True, + ) + doc = DocumentFile.from_images(file_name) + pred_res = predictor(doc) + json_res = pred_res.export() + converted = convert_dict_items_to_list(json_res) + return JSONResponse(content=converted) + except Exception as e: + raise HTTPException(status_code=422, detail=str(e)) + + +def convert_to_list(value): + if isinstance(value, dict): + return {k:convert_to_list(v) for k,v in value.items()} + elif isinstance(value, list): + return [convert_to_list(item) if isinstance(item, (dict, np.ndarray)) else item.tolist() if isinstance(item, np.ndarray) else item for item in value] + elif isinstance(value, np.ndarray): + return value.tolist() + else: + return value + +def convert_dict_items_to_list(d: dict): + converted = {} + + for k, v in d.items(): + converted[k] = convert_to_list(v) + + return converted + + +def renderTemplate(template, context=None): + template = env.get_template(template) + return template.render(context) diff --git a/src/notes.md b/src/notes.md new file mode 100644 index 0000000..14f401c --- /dev/null +++ b/src/notes.md @@ -0,0 +1,30 @@ +# Image Pre-processing + +1. Invert image - Tesseract 3.0 only? +2. Rescale +3. Binarize +4. Remove noise +5. Dilation and erosion +6. Rotation and deskewing +7. Remove borders +8. Missing borders +9. Transparency and alpha channel + +## Invert Image + +```python +inverted_image = cv2.bitwise_not(image) +cv2.imwrite('tmp/inverted_image.jpg', inverted_image) +``` + +## Rescale + +## Binarize + +1. Grayscale image first. +2. Convert to black and white. + * Adjust threshold values, may require testing. + +## Remove Noise + + diff --git a/src/requirements.txt b/src/requirements.txt new file mode 100644 index 0000000..0ded8c0 --- /dev/null +++ b/src/requirements.txt @@ -0,0 +1,76 @@ +annotated-types==0.7.0 +anyascii==0.3.2 +anyio==4.6.2.post1 +certifi==2024.8.30 +charset-normalizer==3.4.0 +click==8.1.7 +defusedxml==0.7.1 +dnspython==2.7.0 +email_validator==2.2.0 +fastapi==0.115.2 +fastapi-cli==0.0.5 +filelock==3.16.1 +fsspec==2024.10.0 +h11==0.14.0 +h5py==3.12.1 +httpcore==1.0.6 +httptools==0.6.4 +httpx==0.27.2 +huggingface-hub==0.26.1 +idna==3.10 +Jinja2==3.1.4 +langdetect==1.0.9 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +mdurl==0.1.2 +mpmath==1.3.0 +networkx==3.4.2 +numpy==2.1.2 +nvidia-cublas-cu12==12.4.5.8 +nvidia-cuda-cupti-cu12==12.4.127 +nvidia-cuda-nvrtc-cu12==12.4.127 +nvidia-cuda-runtime-cu12==12.4.127 +nvidia-cudnn-cu12==9.1.0.70 +nvidia-cufft-cu12==11.2.1.3 +nvidia-curand-cu12==10.3.5.147 +nvidia-cusolver-cu12==11.6.1.9 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-nccl-cu12==2.21.5 +nvidia-nvjitlink-cu12==12.4.127 +nvidia-nvtx-cu12==12.4.127 +onnx==1.17.0 +opencv-python==4.10.0.84 +packaging==24.1 +pillow==11.0.0 +protobuf==5.28.2 +pyclipper==1.3.0.post6 +pydantic==2.9.2 +pydantic_core==2.23.4 +Pygments==2.18.0 +pypdfium2==4.30.0 +python-doctr==0.10.0 +python-dotenv==1.0.1 +python-multipart==0.0.12 +PyYAML==6.0.2 +RapidFuzz==3.10.0 +requests==2.32.3 +rich==13.9.2 +scipy==1.14.1 +setuptools==75.2.0 +shapely==2.0.6 +shellingham==1.5.4 +six==1.16.0 +sniffio==1.3.1 +starlette==0.40.0 +sympy==1.13.1 +torch==2.5.0 +torchvision==0.20.0 +tqdm==4.66.5 +triton==3.1.0 +typer==0.12.5 +typing_extensions==4.12.2 +urllib3==2.2.3 +uvicorn==0.32.0 +uvloop==0.21.0 +watchfiles==0.24.0 +websockets==13.1 diff --git a/src/templates/main.html b/src/templates/main.html new file mode 100644 index 0000000..e888bdb --- /dev/null +++ b/src/templates/main.html @@ -0,0 +1,11 @@ + + + + + + {% for fname, result in results.items() %} +

{{ fname }}

+

{{ result }}

+ {% endfor %} + +