# OCR ocrmypdf>=16.0.0 pytesseract>=0.3.10 # Table extraction camelot-py[cv]>=0.11.0 pdfplumber>=0.10.3 # PDF handling / marking PyMuPDF>=1.23.0 # Data handling pandas>=2.0.0 numpy>=1.24.0 # Image processing (Camelot dependency) opencv-python>=4.8.0 # PDF utilities ghostscript>=0.7 # pip install pymupdf pdf2image pillow pytesseract transformers torch sentencepiece tqdm reportlab opencv-python-headless