This commit is contained in:
parent
323be2a03b
commit
bd0794bd28
31
main.py
31
main.py
@ -1,10 +1,13 @@
|
|||||||
import fitz
|
import fitz
|
||||||
from flask import Flask, request
|
from flask import Flask, request
|
||||||
|
from flask_cors import CORS
|
||||||
from werkzeug.utils import secure_filename
|
from werkzeug.utils import secure_filename
|
||||||
from waitress import serve
|
from waitress import serve
|
||||||
from os import getenv
|
from os import getenv, path
|
||||||
|
from tempfile import gettempdir
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
CORS(app)
|
||||||
|
|
||||||
bullets = ["▶"]
|
bullets = ["▶"]
|
||||||
|
|
||||||
@ -15,7 +18,8 @@ def process_file():
|
|||||||
return "file not found", 400
|
return "file not found", 400
|
||||||
|
|
||||||
assert file.filename is not None
|
assert file.filename is not None
|
||||||
filename = secure_filename(file.filename)
|
filename = path.join(gettempdir(), secure_filename(file.filename))
|
||||||
|
file.save(filename)
|
||||||
|
|
||||||
doc = fitz.open(filename)
|
doc = fitz.open(filename)
|
||||||
|
|
||||||
@ -36,14 +40,15 @@ def process_file():
|
|||||||
h1_size = sorted_sizes[-1]
|
h1_size = sorted_sizes[-1]
|
||||||
h2_size = sorted_sizes[-2]
|
h2_size = sorted_sizes[-2]
|
||||||
|
|
||||||
output = []
|
pass1 = []
|
||||||
for p in textpages:
|
for p in textpages:
|
||||||
outputBlocks = []
|
pass1_blocks = []
|
||||||
for block in p["blocks"]:
|
for block in p["blocks"]:
|
||||||
outputLines = []
|
pass1_lines = []
|
||||||
for line in block["lines"]:
|
for line in block["lines"]:
|
||||||
is_bullet = False
|
is_bullet = False
|
||||||
largest_size = 0
|
largest_size = 0
|
||||||
|
smallest_left = 10000
|
||||||
line_text = ""
|
line_text = ""
|
||||||
for span_index, span in enumerate(line["spans"]):
|
for span_index, span in enumerate(line["spans"]):
|
||||||
t = span["text"]
|
t = span["text"]
|
||||||
@ -54,6 +59,9 @@ def process_file():
|
|||||||
span_size = round(span["size"])
|
span_size = round(span["size"])
|
||||||
if span_size > largest_size:
|
if span_size > largest_size:
|
||||||
largest_size = span_size
|
largest_size = span_size
|
||||||
|
span_left = round(span["origin"][0])
|
||||||
|
if span_left < smallest_left:
|
||||||
|
smallest_left = span_left
|
||||||
line_text += t
|
line_text += t
|
||||||
|
|
||||||
line_type = "p"
|
line_type = "p"
|
||||||
@ -64,16 +72,19 @@ def process_file():
|
|||||||
elif largest_size == h2_size:
|
elif largest_size == h2_size:
|
||||||
line_type = "h2"
|
line_type = "h2"
|
||||||
|
|
||||||
outputLines.append({
|
pass1_lines.append({
|
||||||
"type": line_type,
|
"type": line_type,
|
||||||
"value": line_text
|
"value": line_text,
|
||||||
|
"left": smallest_left,
|
||||||
})
|
})
|
||||||
outputBlocks.append(outputLines)
|
|
||||||
output.append(outputBlocks)
|
|
||||||
|
|
||||||
return output
|
pass1_blocks.append(pass1_lines)
|
||||||
|
pass1.append(pass1_blocks)
|
||||||
|
|
||||||
|
return pass1
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
port = getenv("PORT")
|
port = getenv("PORT")
|
||||||
portNum = 8080
|
portNum = 8080
|
||||||
if port is not None and port.isnumeric():
|
if port is not None and port.isnumeric():
|
||||||
|
16
poetry.lock
generated
16
poetry.lock
generated
@ -275,6 +275,20 @@ Werkzeug = ">=3.0.0"
|
|||||||
async = ["asgiref (>=3.2)"]
|
async = ["asgiref (>=3.2)"]
|
||||||
dotenv = ["python-dotenv"]
|
dotenv = ["python-dotenv"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "flask-cors"
|
||||||
|
version = "4.0.0"
|
||||||
|
description = "A Flask extension adding a decorator for CORS support"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "Flask-Cors-4.0.0.tar.gz", hash = "sha256:f268522fcb2f73e2ecdde1ef45e2fd5c71cc48fe03cffb4b441c6d1b40684eb0"},
|
||||||
|
{file = "Flask_Cors-4.0.0-py2.py3-none-any.whl", hash = "sha256:bc3492bfd6368d27cfe79c7821df5a8a319e1a6d5eab277a3794be19bdc51783"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
Flask = ">=0.9"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "itsdangerous"
|
name = "itsdangerous"
|
||||||
version = "2.2.0"
|
version = "2.2.0"
|
||||||
@ -641,4 +655,4 @@ watchdog = ["watchdog (>=2.3)"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.12"
|
python-versions = "^3.12"
|
||||||
content-hash = "9c774824418bfd2f7302e97a86bb3d181f409f363b37fb9420d5c089cff666a1"
|
content-hash = "e400e043a9c4e03128fa9d04a0ff979ca717fbc08e6c8fcaee8448b2ca643a20"
|
||||||
|
@ -12,6 +12,7 @@ pymupdf = "^1.24.2"
|
|||||||
ruff = "^0.3.7"
|
ruff = "^0.3.7"
|
||||||
flask = "^3.0.3"
|
flask = "^3.0.3"
|
||||||
waitress = "^3.0.0"
|
waitress = "^3.0.0"
|
||||||
|
flask-cors = "^4.0.0"
|
||||||
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user