Commit 7e7b841d authored by Anton Akhmerov's avatar Anton Akhmerov
Browse files

Merge branch '456-different-file-types-for-uploading' into 'master'

Support different file types for scan uploading

Closes #456

See merge request !295
parents a53aaf43 2074257e
Pipeline #35743 passed with stages
in 4 minutes and 42 seconds
......@@ -75,40 +75,22 @@ class Scans extends React.Component {
})
}
onDropPDF = (accepted, rejected) => {
onDropFile = (accepted, rejected, type) => {
if (rejected.length > 0) {
Notification.error('Please upload a scan PDF.')
return
}
accepted.map(file => {
const data = new window.FormData()
data.append('pdf', file)
api.post('scans/' + this.props.examID, data)
.then(() => {
this.updateScans()
})
.catch(resp => {
Notification.error('failed to upload pdf (see javascript console for details)')
console.error('failed to upload PDF:', resp)
})
})
}
onDropZIP = (accepted, rejected) => {
if (rejected.length > 0) {
Notification.error('Please upload a ZIP file.')
Notification.error('Please upload a PDF, ZIP or image.')
return
}
accepted.map(file => {
const data = new window.FormData()
data.append('file', file)
api.post('scans/raw/' + this.props.exam.id, data)
data.append('scan_type', type)
api.post('scans/' + this.props.examID, data)
.then(() => {
this.updateScans()
})
.catch(resp => {
Notification.error('failed to upload ZIP (see javascript console for details)')
console.error('failed to upload ZIP:', resp)
Notification.error('Failed to upload file (see javascript console for details)')
console.error('Failed to upload file:', resp)
})
})
}
......@@ -156,6 +138,8 @@ class Scans extends React.Component {
: null
)
const acceptedTypes = 'application/pdf,application/zip,application/octet-stream,application/x-zip-compressed,multipart/x-zip,image/*'
return <div>
<Hero title='Scans' subtitle='Upload scans and check missing pages' />
......@@ -163,14 +147,14 @@ class Scans extends React.Component {
<section className='section'>
<div className='container'>
<div className='columns'>
<div className='column has-text-centered'>
<Dropzone accept={'application/pdf'} style={{}}
<div className='columns is-multiline is-centered'>
<div className='column is-full has-text-centered'>
<Dropzone accept={acceptedTypes} style={{}}
activeStyle={{ borderStyle: 'dashed', width: 'fit-content', margin: 'auto' }}
onDrop={this.onDropPDF}
onDrop={(accepted, rejected) => this.onDropFile(accepted, rejected, 'normal')}
disablePreview
multiple>
<DropzoneContent text='Choose a PDF file…' />
<DropzoneContent text='Choose a scan file…' />
</Dropzone>
</div>
<div className='column is-half'>
......@@ -195,13 +179,13 @@ class Scans extends React.Component {
more information please refer to <a href='/#image-based-exam'>Home#image-based-exam</a>.
</div>
<Dropzone
accept={'application/zip,application/octet-stream,application/x-zip-compressed,multipart/x-zip'}
accept={acceptedTypes}
style={{}}
activeStyle={{ borderStyle: 'dashed', width: 'fit-content', margin: 'auto' }}
onDrop={this.onDropZIP}
onDrop={(accepted, rejected) => this.onDropFile(accepted, rejected, 'raw')}
disablePreview
multiple>
<DropzoneContent text='Choose a ZIP file…' />
<DropzoneContent text='Choose a scan file…' />
</Dropzone>
</div>
</div>
......
......@@ -24,6 +24,15 @@ dependencies:
- celery
- redis
#oauth library
- requests_oauthlib
#flask session management
- flask_login
#flask role management
- flask_principal
# General utilities
- numpy
- scipy
......
......@@ -48,7 +48,7 @@ from pathlib import Path
from lorem.text import TextLorem
from zesje.database import db, Exam, Scan, Submission, Solution, Page, Copy
from zesje.scans import _process_pdf
from zesje.scans import _process_scan, process_page
from zesje.factory import create_app
import zesje.mysql as mysql
......@@ -165,7 +165,7 @@ def handle_pdf_processing(app, exam_id, pdf, pages, student_ids, copies_per_stud
if skip_processing:
_fake_process_pdf(scan, pages, student_ids, copies_per_student)
else:
_process_pdf(scan_id=scan.id)
_process_scan(scan_id=scan.id, process_page_function=process_page)
return {
'id': scan.id,
......
......@@ -4,7 +4,7 @@ import zipfile
from io import BytesIO
from zesje.database import db, Exam, Scan
from zesje.raw_scans import process_zipped_images
from zesje.scans import process_scan
@pytest.fixture
......@@ -28,10 +28,11 @@ def zip_file():
def test_no_zip(test_client, app_with_data):
app, exam = app_with_data
data = {
'file': (b'abc', 'image.jpg')
'file': (b'abc', 'image.jpg'),
'scan_type': 'raw'
}
response = test_client.post(
f'api/scans/raw/{exam.id}', data=data,
f'api/scans/{exam.id}', data=data,
content_type='multipart/form-data'
)
assert response.status_code == 400
......@@ -39,10 +40,11 @@ def test_no_zip(test_client, app_with_data):
def test_no_exam(test_client, zip_file):
data = {
'file': (zip_file, 'file.zip')
'file': (zip_file, 'file.zip'),
'scan_type': 'raw'
}
response = test_client.post(
'api/scans/raw/1', data=data,
'api/scans/1', data=data,
content_type='multipart/form-data'
)
assert response.status_code == 404
......@@ -54,10 +56,11 @@ def test_not_finalized_exam(test_client, zip_file, app_with_data):
db.session.commit()
data = {
'file': (zip_file, 'file.zip')
'file': (zip_file, 'file.zip'),
'scan_type': 'raw'
}
response = test_client.post(
f'api/scans/raw/{exam.id}', data=data,
f'api/scans/{exam.id}', data=data,
content_type='multipart/form-data'
)
assert response.status_code == 403
......@@ -71,28 +74,43 @@ def test_saving_zip_failed(test_client, zip_file, app_with_data, monkeypatch):
monkeypatch.setattr(app, 'config', app_config_mock)
data = {
'file': (zip_file, 'file.zip')
'file': (zip_file, 'file.zip'),
'scan_type': 'raw'
}
response = test_client.post(
f'api/scans/raw/{exam.id}', data=data,
f'api/scans/{exam.id}', data=data,
content_type='multipart/form-data'
)
assert response.status_code == 500
assert len(Scan.query.all()) == 0
def test_invalid_scan_type(test_client, zip_file, app_with_data):
app, exam = app_with_data
data = {
'file': (zip_file, 'file.zip'),
'scan_type': 'invalid'
}
response = test_client.post(
f'api/scans/{exam.id}', data=data,
content_type='multipart/form-data'
)
assert response.status_code == 400
def test_processing_started(test_client, zip_file, app_with_data, monkeypatch):
app, exam = app_with_data
data = {
'file': (zip_file, 'file.zip')
'file': (zip_file, 'file.zip'),
'scan_type': 'raw'
}
scan_ids = []
monkeypatch.setattr(process_zipped_images, 'delay', lambda scan_id: scan_ids.append(scan_id))
monkeypatch.setattr(process_scan, 'delay', lambda scan_id, scan_type: scan_ids.append(scan_id))
response = test_client.post(
f'api/scans/raw/{exam.id}', data=data,
f'api/scans/{exam.id}', data=data,
content_type='multipart/form-data'
)
assert response.status_code == 200
......
import pytest
import zipfile
from pathlib import Path
from io import BytesIO
from PIL import Image
from zesje.image_extraction import convert_to_rgb
from zesje.image_extraction import convert_to_rgb, extract_pages_from_file, guess_page_info, guess_missing_page_info
from zesje.database import Student
image_modes = ['RGB', 'RGBA', 'L', 'P', 'CMYK', 'HSV']
......@@ -15,3 +19,181 @@ def test_convert_to_rgb(image_mode):
assert converted.mode == 'RGB'
assert converted.size == image.size
guess_image_info_arguments = [
(['1234567-02.png'], (1234567, 1, None), 'Valid student page'),
(['1234567-1-4.jpeg'], (1234567, 0, 4), 'Valid student page copy'),
(['1234567.png'], (1234567, None, None), 'Valid student'),
(['ABCDEFG.jpeg'], (None, None, None), 'Invalid letter'),
(['1234567.zip', '1.png'], (1234567, 0, None), 'Vallid zip student page'),
(['1234567.zip', '1-2.jpg'], (1234567, 0, 2), 'Valid zip student page copy'),
(['1234567.pdf', 1], (1234567, 0, None), 'Valid pdf student page'),
(['some.zip', '1234567.pdf', 2], (1234567, 1, None), 'Valid scan zip student page'),
(['some.zip', '1234567/2.pdf', 2], (1234567, 1, 2), 'Valid scan zip student page copy'),
(['some.zip', '1234567-1.jpg'], (1234567, 0, None), 'Valid scan zip student page'),
(['some.zip', '1234567/1.png'], (1234567, 0, None), 'Valid scan zip folder student page'),
(['some.zip', 'Random First Last 99 Januari 99/submission.pdf', 3], (1000001, 2, None),
'Valid folder name student page'),
(['some.zip', 'Random First Last 99 Januari 99/1000001.pdf', 3], (1000001, 2, None),
'Valid folder name id student page'),
(['tn1234.zip', '1234567/final tn1234.pdf', 5], (1234567, 4, None), 'Valid scan zip pdf number student page'),
(['tn1234.zip', '1234567/tn1234 page 1.pdf', 4], (1234567, 0, 4), 'Valid scan zip pdf number student page copy'),
(['tn1234.zip', '1234567/tn1234 page 1 copy 2.png'], (1234567, 0, 2), 'Valid scan zip img number student page')
]
@pytest.mark.parametrize(
'file_info, info',
[(file_info, info) for file_info, info, _ in guess_image_info_arguments],
ids=[id for *_, id in guess_image_info_arguments])
def test_guess_image_info(file_info, info):
students = [
Student(id=1000001, first_name='First', last_name='Last')
]
try:
ext_info = guess_page_info(file_info, students)
except Exception:
ext_info = None
assert ext_info == info
def test_guess_missing_page_info():
page_infos = [
(None, None, None),
(1000000, 0, None), (1000000, 1, None),
(1000001, 0, None), (1000001, 1, 1),
(1000002, None, None), (1000002, 1, 1),
(1000003, 1, None), (1000003, 1, None),
(1000004, 1, 1), (1000004, 1, 2),
(1000005, None, None),
(1000006, None, None), (1000006, None, None),
(1000007, 0, None), (1000007, 0, 2),
(1000008, 0, None), (1000008, 0, 1)
]
fixed_page_infos = guess_missing_page_info(page_infos)
assert fixed_page_infos == [
(None, None, None),
(1000000, 0, 1), (1000000, 1, 1),
(1000001, 0, 1), (1000001, 1, 1),
(1000002, 0, 1), (1000002, 1, 1),
(1000003, None, None), (1000003, None, None),
(1000004, 1, 1), (1000004, 1, 2),
(1000005, 0, 1),
(1000006, None, None), (1000006, None, None),
(1000007, 0, 1), (1000007, 0, 2),
(1000008, None, None), (1000008, None, None)
]
def test_extract_pages_no_image(app):
file_content = b'1701'
result = [result for result in extract_pages_from_file(file_content, 'notanimage.nopng')]
assert len(result) == 1
image, page_info, file_info, number, total = result[0]
assert not isinstance(image, Image.Image)
assert image == file_content
assert number == 1
assert total == 1
def test_extract_pages_from_zip(app):
with BytesIO() as zip_bytes:
with zipfile.ZipFile(zip_bytes, 'w') as z, \
BytesIO() as image_bytes, \
Image.new('RGB', (10, 10)) as image:
image.save(image_bytes, format='png')
image_bytes.seek(0)
z.writestr('1000000-1.png', image_bytes.read())
image_bytes.seek(0)
z.writestr('1000000/2.png', image_bytes.read())
zip_bytes.seek(0)
last_total = 0
pages = [0, 1]
for expected_number, (image, page_info, file_info, number, total) in enumerate(
extract_pages_from_file(zip_bytes, 'scan.zip'), start=1
):
assert isinstance(image, Image.Image)
assert image.size == (10, 10)
assert number == expected_number
assert number <= total
assert total >= last_total
last_total = total
student, page, copy = page_info
assert student == 1000000
assert page in pages
pages.pop(pages.index(page))
assert copy == 1
assert last_total == 2
def test_extract_images_from_pdf(app, datadir):
flat_pdf = Path(datadir) / 'flattened-a4-2pages.pdf'
last_total = 0
for expected_number, (image, page_info, file_info, number, total) in enumerate(
extract_pages_from_file(flat_pdf, flat_pdf, dpi=72), start=1
):
assert isinstance(image, Image.Image)
assert image.size == (827, 1169)
assert number == expected_number
assert number <= total
assert total >= last_total
last_total = total
assert page_info == (None, None, None)
assert last_total == 2
def test_extract_images_from_mixed_zip(app, datadir):
flat_pdf = Path(datadir) / 'flattened-a4-2pages.pdf'
with BytesIO() as zip_bytes:
with zipfile.ZipFile(zip_bytes, 'w') as z, \
BytesIO() as image_bytes, \
Image.new('RGB', (10, 10)) as image:
image.save(image_bytes, format='png')
image_bytes.seek(0)
z.writestr('1000000-4.png', image_bytes.read())
image_bytes.seek(0)
z.writestr('1000000/3.png', image_bytes.read())
z.writestr('1000000.pdf', flat_pdf.read_bytes())
z.writestr('1000000/4.txt', b'1701')
zip_bytes.seek(0)
last_total = 0
pages = [0, 1, 2, 3]
for expected_number, (image, page_info, file_info, number, total) in enumerate(
extract_pages_from_file(zip_bytes, 'scan.zip'), start=1
):
if isinstance(image, Image.Image):
assert image.size in [(10, 10), (827, 1169)]
else:
assert image.read() == b'1701'
assert number == expected_number
assert number <= total
assert total >= last_total
last_total = total
student, page, copy = page_info
if page is None:
assert page_info == (None, None, None)
else:
assert student == 1000000
if page is None:
assert page_info == (1000000, None, None)
else:
assert page in pages
pages.pop(pages.index(page))
assert copy == 1
assert pages == []
assert last_total == 5
......@@ -3,18 +3,20 @@ import zipfile
from io import BytesIO
from PIL import Image
from pathlib import Path
from zesje.raw_scans import extract_image_info, create_copy, _process_zipped_images
from zesje.database import db, Exam, Student, Submission, Scan, Problem
from zesje.raw_scans import create_copy, process_page
from zesje.scans import _process_scan
from zesje.database import db, Exam, Student, Submission, Scan, Problem, ExamWidget
@pytest.fixture
def app_with_data(app):
exam = Exam(name='')
widget = ExamWidget(name='barcode_widget', exam=exam, x=0, y=0)
problem = Problem(exam=exam, name='Problem')
students = [Student(id=i+1000000, first_name='', last_name='') for i in range(2)]
db.session.add(exam)
db.session.add(widget)
db.session.add(problem)
for student in students:
db.session.add(student)
......@@ -22,25 +24,6 @@ def app_with_data(app):
yield app, exam, students
@pytest.mark.parametrize('file_name, info', [
('1234567-02.png', (1234567, 1, 1)),
('1234567-1-4.jpeg', (1234567, 0, 4)),
('1234567.png', None),
('ABCDEFG.jpeg', None)],
ids=[
'Valid name (no copy)',
'Valid name (with copy)',
'Invalid name (no page)',
'Invalid name (no student)'])
def test_extract_image_info(file_name, info):
try:
ext_info = extract_image_info(file_name)
except Exception:
ext_info = None
assert ext_info == info
def test_create_copy(app_with_data):
app, exam, students = app_with_data
submission = Submission(exam=exam, student=students[0])
......@@ -74,12 +57,10 @@ def test_zip_process(app_with_data, zip_file):
db.session.add(scan)
db.session.commit()
path = Path(app.config['SCAN_DIRECTORY']) / f'{scan.id}.zip'
print(path)
with open(str(path), 'wb') as file:
with open(str(scan.path), 'wb') as file:
file.write(zip_file.getvalue())
_process_zipped_images(scan.id)
_process_scan(scan.id, process_page)
for student in students:
sub = Submission.query.filter(Submission.student == student,
......
......@@ -10,7 +10,7 @@ from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from zesje.scans import decode_barcode, ExamMetadata, ExtractedBarcode, exam_metadata, guess_dpi
from zesje.image_extraction import extract_image_pikepdf, extract_images
from zesje.image_extraction import extract_image_pikepdf, extract_images_from_pdf
from zesje.database import db
from zesje.api.exams import generate_exam_token, _exam_generate_data
from zesje.pdf_generation import exam_dir, exam_pdf_path, write_finalized_exam, generate_single_pdf
......@@ -68,7 +68,7 @@ def generate_flat_scan_data(copy_number=145):
generate_single_pdf(exam, copy_number, copy_number, scan_pdf)
scan_pdf.seek(0)
for image, _ in extract_images(scan_pdf.name, dpi=150):
for image, _ in extract_images_from_pdf(scan_pdf.name, dpi=150):
yield image, exam_config, examdir
......@@ -145,7 +145,7 @@ def apply_scan(img, rotation=0, scale=1, skew=(0, 0)):
def test_pipeline(full_app):
for image, exam_config, examdir in generate_flat_scan_data():
success, reason = scans.process_page(image, exam_config, examdir)
success, reason = scans.process_page(image, [], [], exam_config, examdir)
assert success is True, reason
......@@ -157,7 +157,7 @@ def test_pipeline(full_app):
def test_noise(full_app, threshold, expected):
for image, exam_config, _ in generate_flat_scan_data():
image = apply_whitenoise(image, threshold)
success, reason = scans.process_page(image, exam_config)
success, reason = scans.process_page(image, [], [], exam_config)
assert success is expected, reason
......@@ -170,7 +170,7 @@ def test_noise(full_app, threshold, expected):
def test_rotate(full_app, rotation, expected):
for image, exam_config, _ in generate_flat_scan_data():
image = apply_scan(img=image, rotation=rotation)
success, reason = scans.process_page(image, exam_config)
success, reason = scans.process_page(image, [], [], exam_config)
assert success is expected, reason
......@@ -181,7 +181,7 @@ def test_rotate(full_app, rotation, expected):
def test_scale(full_app, scale, expected):
for image, exam_config, _ in generate_flat_scan_data():
image = apply_scan(img=image, scale=scale)
success, reason = scans.process_page(image, exam_config)
success, reason = scans.process_page(image, [], [], exam_config)
assert success is expected, reason
......@@ -192,7 +192,7 @@ def test_scale(full_app, scale, expected):
def test_skew(full_app, skew, expected):
for image, exam_config, _ in generate_flat_scan_data():
image = apply_scan(img=image, skew=skew)
success, reason = scans.process_page(image, exam_config)
success, reason = scans.process_page(image, [], [], exam_config)
assert success is expected, reason
......@@ -204,7 +204,7 @@ def test_all_effects(full_app, rotation, scale, skew, expected):
for image, exam_config, _ in generate_flat_scan_data():
image = apply_scan(
img=image, rotation=rotation, scale=scale, skew=skew)
success, reason = scans.process_page(image, exam_config)
success, reason = scans.process_page(image, [], [], exam_config)
assert success is expected, reason
......@@ -233,9 +233,8 @@ def test_image_extraction_pike(datadir, filename, expected):
def test_image_extraction(datadir, filename):
file = os.path.join(datadir, filename)
page = 0
for img, pagenr in scans.extract_images(file):
for img, _ in extract_images_from_pdf(file, only_info=False):
page += 1
assert pagenr == page
assert img is not None
assert np.average(np.array(img)) == 255
assert page == 2
......
......@@ -3,7 +3,7 @@ from flask_restful import Api
from .graders import Graders
from .exams import Exams, ExamSource, ExamGeneratedPdfs, ExamPreview
from .scans import Scans, RawScans
from .scans import Scans
from .students import Students
from .copies import Copies, MissingPages
from .submissions import Submissions
......@@ -29,7 +29,6 @@ api.add_resource(ExamSource, '/exams/<int:exam_id>/source_pdf')
api.add_resource(ExamGeneratedPdfs, '/exams/<int:exam_id>/generated_pdfs')
api.add_resource(ExamPreview, '/exams/<int:exam_id>/preview')
api.add_resource(Scans, '/scans/<int:exam_id>')
api.add_resource(RawScans, '/scans/raw/<int:exam_id>')
api.add_resource(Students, '/students', '/students/<int:student_id>')
api.add_resource(Copies,
'/copies/<int:exam_id>',
......
import os
from flask import current_app
from flask_restful import Resource, reqparse
from werkzeug.datastructures import FileStorage
from ..scans import process_pdf
from ..raw_scans import process_zipped_images
from ..scans import process_scan
from ..database import db, Exam, Scan
ZIP_MIME_TYPES = ['application/zip', 'application/octet-stream', 'application/x-zip-compressed', 'multipart/x-zip']
class Scans(Resource):
"""Getting a list of uploaded scans, and uploading new ones."""