From bd0794bd28bba29e2b961249971f09c3fbbd5e31 Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Thu, 18 Apr 2024 14:07:48 +0100 Subject: [PATCH] Organise things --- main.py | 31 +++++++++++++++++++++---------- poetry.lock | 16 +++++++++++++++- pyproject.toml | 1 + 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/main.py b/main.py index 2a81341..0eef020 100644 --- a/main.py +++ b/main.py @@ -1,10 +1,13 @@ import fitz from flask import Flask, request +from flask_cors import CORS from werkzeug.utils import secure_filename from waitress import serve -from os import getenv +from os import getenv, path +from tempfile import gettempdir app = Flask(__name__) +CORS(app) bullets = ["▶"] @@ -15,7 +18,8 @@ def process_file(): return "file not found", 400 assert file.filename is not None - filename = secure_filename(file.filename) + filename = path.join(gettempdir(), secure_filename(file.filename)) + file.save(filename) doc = fitz.open(filename) @@ -36,14 +40,15 @@ def process_file(): h1_size = sorted_sizes[-1] h2_size = sorted_sizes[-2] - output = [] + pass1 = [] for p in textpages: - outputBlocks = [] + pass1_blocks = [] for block in p["blocks"]: - outputLines = [] + pass1_lines = [] for line in block["lines"]: is_bullet = False largest_size = 0 + smallest_left = 10000 line_text = "" for span_index, span in enumerate(line["spans"]): t = span["text"] @@ -54,6 +59,9 @@ def process_file(): span_size = round(span["size"]) if span_size > largest_size: largest_size = span_size + span_left = round(span["origin"][0]) + if span_left < smallest_left: + smallest_left = span_left line_text += t line_type = "p" @@ -64,16 +72,19 @@ def process_file(): elif largest_size == h2_size: line_type = "h2" - outputLines.append({ + pass1_lines.append({ "type": line_type, - "value": line_text + "value": line_text, + "left": smallest_left, }) - outputBlocks.append(outputLines) - output.append(outputBlocks) - return output + pass1_blocks.append(pass1_lines) + pass1.append(pass1_blocks) + + return pass1 if __name__ == "__main__": + port = getenv("PORT") portNum = 8080 if port is not None and port.isnumeric(): diff --git a/poetry.lock b/poetry.lock index ee64790..100e7ca 100644 --- a/poetry.lock +++ b/poetry.lock @@ -275,6 +275,20 @@ Werkzeug = ">=3.0.0" async = ["asgiref (>=3.2)"] dotenv = ["python-dotenv"] +[[package]] +name = "flask-cors" +version = "4.0.0" +description = "A Flask extension adding a decorator for CORS support" +optional = false +python-versions = "*" +files = [ + {file = "Flask-Cors-4.0.0.tar.gz", hash = "sha256:f268522fcb2f73e2ecdde1ef45e2fd5c71cc48fe03cffb4b441c6d1b40684eb0"}, + {file = "Flask_Cors-4.0.0-py2.py3-none-any.whl", hash = "sha256:bc3492bfd6368d27cfe79c7821df5a8a319e1a6d5eab277a3794be19bdc51783"}, +] + +[package.dependencies] +Flask = ">=0.9" + [[package]] name = "itsdangerous" version = "2.2.0" @@ -641,4 +655,4 @@ watchdog = ["watchdog (>=2.3)"] [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "9c774824418bfd2f7302e97a86bb3d181f409f363b37fb9420d5c089cff666a1" +content-hash = "e400e043a9c4e03128fa9d04a0ff979ca717fbc08e6c8fcaee8448b2ca643a20" diff --git a/pyproject.toml b/pyproject.toml index 5a06499..2e5850b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,7 @@ pymupdf = "^1.24.2" ruff = "^0.3.7" flask = "^3.0.3" waitress = "^3.0.0" +flask-cors = "^4.0.0" [build-system]