chore: initial commit

This commit is contained in:
Mark Bailey 2024-10-25 12:38:24 -04:00
commit acdf6431d6
6 changed files with 215 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
.venv/
__pycache__/
data/
r.http

1
README.md Normal file
View File

@ -0,0 +1 @@
# DocTR Python API w/ FastAPI

93
src/main.py Normal file
View File

@ -0,0 +1,93 @@
import os
import pathlib
from fastapi.responses import JSONResponse
import numpy as np
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
from fastapi import FastAPI, HTTPException, Response
from jinja2 import Environment, FileSystemLoader, select_autoescape
from urllib.parse import unquote
app = FastAPI()
env = Environment(loader=FileSystemLoader("templates"), autoescape=select_autoescape())
@app.get("/")
def root():
base_dir = "/home/m/Projects/sandbox/data/app/upload/classifier/deal/389/"
files = os.listdir(base_dir)
predictor = ocr_predictor(
"db_resnet50",
pretrained=True,
assume_straight_pages=False,
preserve_aspect_ratio=True,
)
result = {}
i = 0
for f in files:
if i >= 1:
break
if pathlib.Path(f).suffix not in [".jpg", ".png"]:
continue
print("Working on: " + f)
doc = DocumentFile.from_images(base_dir + f)
pred_res = predictor(doc)
result[f] = pred_res.render()
i += 1
return Response(renderTemplate("main.html", {"results": result}))
@app.get("/detect/")
def file(file_name: str):
file_name = os.path.expanduser("~/Projects/sandbox/")+unquote(file_name).lstrip("/")
print("Working on: " + file_name)
try:
predictor = ocr_predictor(
"db_resnet50",
"vitstr_base",
pretrained=True,
assume_straight_pages=False,
preserve_aspect_ratio=True,
)
doc = DocumentFile.from_images(file_name)
pred_res = predictor(doc)
json_res = pred_res.export()
converted = convert_dict_items_to_list(json_res)
return JSONResponse(content=converted)
except Exception as e:
raise HTTPException(status_code=422, detail=str(e))
def convert_to_list(value):
if isinstance(value, dict):
return {k:convert_to_list(v) for k,v in value.items()}
elif isinstance(value, list):
return [convert_to_list(item) if isinstance(item, (dict, np.ndarray)) else item.tolist() if isinstance(item, np.ndarray) else item for item in value]
elif isinstance(value, np.ndarray):
return value.tolist()
else:
return value
def convert_dict_items_to_list(d: dict):
converted = {}
for k, v in d.items():
converted[k] = convert_to_list(v)
return converted
def renderTemplate(template, context=None):
template = env.get_template(template)
return template.render(context)

30
src/notes.md Normal file
View File

@ -0,0 +1,30 @@
# Image Pre-processing
1. Invert image - Tesseract 3.0 only?
2. Rescale
3. Binarize
4. Remove noise
5. Dilation and erosion
6. Rotation and deskewing
7. Remove borders
8. Missing borders
9. Transparency and alpha channel
## Invert Image
```python
inverted_image = cv2.bitwise_not(image)
cv2.imwrite('tmp/inverted_image.jpg', inverted_image)
```
## Rescale
## Binarize
1. Grayscale image first.
2. Convert to black and white.
* Adjust threshold values, may require testing.
## Remove Noise

76
src/requirements.txt Normal file
View File

@ -0,0 +1,76 @@
annotated-types==0.7.0
anyascii==0.3.2
anyio==4.6.2.post1
certifi==2024.8.30
charset-normalizer==3.4.0
click==8.1.7
defusedxml==0.7.1
dnspython==2.7.0
email_validator==2.2.0
fastapi==0.115.2
fastapi-cli==0.0.5
filelock==3.16.1
fsspec==2024.10.0
h11==0.14.0
h5py==3.12.1
httpcore==1.0.6
httptools==0.6.4
httpx==0.27.2
huggingface-hub==0.26.1
idna==3.10
Jinja2==3.1.4
langdetect==1.0.9
markdown-it-py==3.0.0
MarkupSafe==3.0.2
mdurl==0.1.2
mpmath==1.3.0
networkx==3.4.2
numpy==2.1.2
nvidia-cublas-cu12==12.4.5.8
nvidia-cuda-cupti-cu12==12.4.127
nvidia-cuda-nvrtc-cu12==12.4.127
nvidia-cuda-runtime-cu12==12.4.127
nvidia-cudnn-cu12==9.1.0.70
nvidia-cufft-cu12==11.2.1.3
nvidia-curand-cu12==10.3.5.147
nvidia-cusolver-cu12==11.6.1.9
nvidia-cusparse-cu12==12.3.1.170
nvidia-nccl-cu12==2.21.5
nvidia-nvjitlink-cu12==12.4.127
nvidia-nvtx-cu12==12.4.127
onnx==1.17.0
opencv-python==4.10.0.84
packaging==24.1
pillow==11.0.0
protobuf==5.28.2
pyclipper==1.3.0.post6
pydantic==2.9.2
pydantic_core==2.23.4
Pygments==2.18.0
pypdfium2==4.30.0
python-doctr==0.10.0
python-dotenv==1.0.1
python-multipart==0.0.12
PyYAML==6.0.2
RapidFuzz==3.10.0
requests==2.32.3
rich==13.9.2
scipy==1.14.1
setuptools==75.2.0
shapely==2.0.6
shellingham==1.5.4
six==1.16.0
sniffio==1.3.1
starlette==0.40.0
sympy==1.13.1
torch==2.5.0
torchvision==0.20.0
tqdm==4.66.5
triton==3.1.0
typer==0.12.5
typing_extensions==4.12.2
urllib3==2.2.3
uvicorn==0.32.0
uvloop==0.21.0
watchfiles==0.24.0
websockets==13.1

11
src/templates/main.html Normal file
View File

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="en">
<head>
</head>
<body>
{% for fname, result in results.items() %}
<h1>{{ fname }}</h1>
<p>{{ result }}</p>
{% endfor %}
</body>
</html>