diff --git a/.gitignore b/.gitignore index 1f4b930a51d78a3c067a69972966be218992e340..18f8c65f4b93c71256d19e9f97026f399ce7c644 100644 --- a/.gitignore +++ b/.gitignore @@ -98,3 +98,4 @@ stats.json # pytest coverage reports .coverage cov.xml +cov.html/ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e25ee3783078527bf178ac949e0366baf913e37a..eaa1841ceed59037d21fed908bd155f4f5b971df 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,3 +1,4 @@ + # This base image can be found in 'Dockerfile' image: zesje/base @@ -35,11 +36,6 @@ test_js: stage: test script: yarn test:js -test_py: - <<: *python_packages - stage: test - script: yarn test:py - lint_js: <<: *node_modules stage: test @@ -53,9 +49,13 @@ lint_py: allow_failure: true script: - yarn lint:py - -cov: + +test_py: <<: *python_packages stage: test script: - - python -m pytest --cov=zesje + - yarn test:py:cov + artifacts: + paths: + - cov.html/ + expire_in: 1 week diff --git a/AUTHORS.md b/AUTHORS.md index 1b470d040e35f336136f9efa9f1fde981c77d570..2cf605693e74665cac0720f82b93d3320f3c65ec 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -6,3 +6,15 @@ * Justin van der Krieken * Jamy Mahabier * Nick Cleintuar +* Hugo Kerstens +* Stefan Hugtenburg +* Hidde Leistra +* Pim Otte +* Luc Enthoven + +<!-- +Execute +git shortlog -s | sed -e "s/^ *[0-9\t ]*//"| xargs -i sh -c 'grep -q "{}" AUTHORS.md || echo "{}"' + +To check if any authors are missing from this list. + --> diff --git a/README.md b/README.md index e927866a316c2a6186370dbb59756beea6edbf72..47181611e5f7205d619462e0d67273e0d94e54f9 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[](https://gitlab.kwant-project.org/works-on-my-machine/zesje/commits/pytest-cov) +[](https://gitlab.kwant-project.org/zesje/zesje/commits/master) # Welcome to Zesje @@ -68,13 +68,30 @@ You can run the tests by running #### Viewing test coverage -As a test coverage tool, `pytest-cov` is used. +As a test coverage tool for Python tests, `pytest-cov` is used. To view test coverage, run - yarn cov + yarn test:py:cov -Or use Conda in the Zesje repo and run `python -m pytest --cov=zesje tests/` +A coverage report is now generated in the terminal, as an XML file, and in HTML format. +The HTML file shows an overview of untested code in red. + +##### Viewing coverage in Visual Studio Code + +There is a plugin called Coverage Gutter that will highlight which lines of code are covered. +Simply install Coverage Gutter, after which a watch button appears in the colored box at the bottom of your IDE. +When you click watch, green and red lines appear next to the line numbers indicating if the code is covered. + +Coverage Gutter uses the XML which is produced by `yarn test:py:cov`, called `cov.xml`. This file should be located in the main folder. + +##### Viewing coverage in PyCharm +To view test coverage in PyCharm, run `yarn test:py:cov` to generate the coverage report XML file `cov.xml` if it is not present already. + +Next, open up PyCharm and in the top bar go to **Run -> Show Code Coverage Data** (Ctrl + Alt + F6). + +Press **+** and add the file `cov.xml` that is in the main project directory. +A code coverage report should now appear in the side bar on the right. #### Policy errors diff --git a/client/components/Hero.jsx b/client/components/Hero.jsx index 0b60fba63dbff092c83e8457777f5a8be31a9dac..62e6b696beda295eade8e932eacfed03b7358fff 100644 --- a/client/components/Hero.jsx +++ b/client/components/Hero.jsx @@ -2,7 +2,7 @@ import React from 'react' const Hero = (props) => { return ( - <section className='hero is-primary is-info'> + <section className='hero is-primary is-info is-small'> <div className='hero-body'> <div className='container'> <h1 className='title'> diff --git a/client/views/Exam.jsx b/client/views/Exam.jsx index f70b4a402a5693079c0f3fb4b832b4888a7e6677..3ccbc0087586e63a07fe50a09b7b2900dded0730 100644 --- a/client/views/Exam.jsx +++ b/client/views/Exam.jsx @@ -89,7 +89,9 @@ class Exams extends React.Component { // This might try to save the name unnecessary, but better twice than never. this.saveProblemName() // Force an update of the upper exam state, since this component does not update and use that correctly - this.props.updateExam(this.props.examID) + if (!this.state.deletingExam) { + this.props.updateExam(this.props.examID) + } } saveProblemName = () => { diff --git a/client/views/Grade.jsx b/client/views/Grade.jsx index dc2b81f453787d5436edec0ddc394e99c5813473..e3bafba95d19e35134a4208cb2528bc885bdf4fc 100644 --- a/client/views/Grade.jsx +++ b/client/views/Grade.jsx @@ -12,6 +12,7 @@ import withShortcuts from '../components/ShortcutBinder.jsx' import * as api from '../api.jsx' import 'bulma-tooltip/dist/css/bulma-tooltip.min.css' +import './grade/Grade.css' class Grade extends React.Component { state = { @@ -294,7 +295,7 @@ class Grade extends React.Component { </article> : null } - <p className='box'> + <p className={'box' + (solution.graded_at ? ' is-graded' : '')}> <img src={exam.id ? ('api/images/solutions/' + exam.id + '/' + problem.id + '/' + submission.id + '/' + (this.state.fullPage ? '1' : '0')) + '?' + this.getLocationHash(problem) : ''} alt='' /> diff --git a/client/views/grade/EditPanel.jsx b/client/views/grade/EditPanel.jsx index 9683af57f759431c7d8fbf1554dd374348c57e0f..f1f574ac6f383042b186a8ac1107caae34f00389 100644 --- a/client/views/grade/EditPanel.jsx +++ b/client/views/grade/EditPanel.jsx @@ -154,7 +154,7 @@ class EditPanel extends React.Component { <div className='panel-block'> <BackButton onClick={this.props.goBack} /> <SaveButton onClick={this.saveFeedback} exists={this.props.feedback} - disabled={!this.state.name || !this.state.score || isNaN(parseInt(this.state.score))} /> + disabled={!this.state.name || (!this.state.score && this.state.score !== 0) || isNaN(parseInt(this.state.score))} /> <DeleteButton onClick={() => { this.setState({deleting: true}) }} exists={this.props.feedback} /> <ConfirmationModal headerText={`Do you want to irreversibly delete feedback option "${this.state.name}"?`} diff --git a/client/views/grade/Grade.css b/client/views/grade/Grade.css new file mode 100644 index 0000000000000000000000000000000000000000..90e5dab9e507f7bcb2185c4541ce7632a4564d55 --- /dev/null +++ b/client/views/grade/Grade.css @@ -0,0 +1,3 @@ +.box.is-graded { + box-shadow: 0px 0px 6px #23d160, 0 0 0 1px rgba(10, 10, 10, 0.1); +} diff --git a/package.json b/package.json index 4f449b71d04d16812e1a19089b980f096118bdc4..1e7457b6650f43a3ab0eb773b7865ffdfbae16c9 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "main": "index.js", "license": "AGPL-3.0", "scripts": { - "dev": "concurrently --kill-others --names \"WEBPACK,PYTHON,CELERY\" --prefix-colors \"bgBlue.bold,bgGreen.bold,bgRed.bold\" \"webpack-dev-server --hot --inline --progress --config webpack.dev.js\" \"ZESJE_SETTINGS=$(pwd)/zesje.dev.cfg python3 zesje\" \"ZESJE_SETTINGS=$(pwd)/zesje.dev.cfg celery -A zesje.celery worker\"", + "dev": "concurrently --kill-others --names \"WEBPACK,PYTHON,CELERY\" --prefix-colors \"bgBlue.bold,bgGreen.bold,bgRed.bold\" \"webpack-dev-server --hot --inline --progress --config webpack.dev.js\" \"ZESJE_SETTINGS=$(pwd)/zesje.dev.cfg python3 zesje\" \"ZESJE_SETTINGS=$(pwd)/zesje.dev.cfg celery -A zesje.celery worker -l info --autoscale=4,1 --max-tasks-per-child=16\"", "build": "webpack --config webpack.prod.js", "ci": "yarn lint; yarn test", "lint": "yarn lint:js; yarn lint:py", @@ -17,7 +17,7 @@ "migrate:dev": "ZESJE_SETTINGS=$(pwd)/zesje.dev.cfg FLASK_APP=zesje/__init__.py flask db upgrade", "migrate": "FLASK_APP=zesje/__init__.py flask db upgrade", "prepare-migration": "ZESJE_SETTINGS=$(pwd)/zesje.dev.cfg FLASK_APP=zesje/__init__.py flask db migrate", - "cov": "python -m pytest --cov=zesje --cov-report=xml:cov.xml tests/", + "test:py:cov": "python3 -m pytest -v -W error::RuntimeWarning --cov=zesje --cov-report=xml:cov.xml --cov-report=html:cov.html --cov-report=term tests/", "migrate-down": "FLASK_APP=zesje/__init__.py flask db downgrade" }, "standard": { diff --git a/tests/data/flattened-a4-2pages.pdf b/tests/data/flattened-a4-2pages.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f3fd0b348aae99c435ed85f1192d51dccac4c15c Binary files /dev/null and b/tests/data/flattened-a4-2pages.pdf differ diff --git a/tests/test_scans.py b/tests/test_scans.py index 115d2c5b06dfdb3499cfb104585febddd0fe2995..9eadb2a97fba2a09e59aac9f5e26c045c2f9d1d2 100644 --- a/tests/test_scans.py +++ b/tests/test_scans.py @@ -265,3 +265,18 @@ def test_all_effects( # image.show() success, reason = scans.process_page(image, new_exam, datadir) assert success is expected, reason + + +@pytest.mark.parametrize('filename', [ + 'blank-a4-2pages.pdf', + 'flattened-a4-2pages.pdf'], + ids=['blank pdf', 'flattened pdf']) +def test_image_extraction(datadir, filename): + file = os.path.join(datadir, filename) + page = 0 + for img, pagenr in scans.extract_images(file): + page += 1 + assert pagenr == page + assert img is not None + assert np.average(np.array(img)) == 255 + assert page == 2 diff --git a/zesje/api/exams.py b/zesje/api/exams.py index 4f77a35e0cde58d7d170f56f29a57b89eabeabbf..a1c449a05bd1ad5d5a1f50a75c7e3698d66114e5 100644 --- a/zesje/api/exams.py +++ b/zesje/api/exams.py @@ -66,8 +66,26 @@ class Exams(Resource): return dict(status=404, message='Exam does not exist.'), 404 elif exam.finalized: return dict(status=409, message='Cannot delete a finalized exam.'), 409 + elif Submission.query.filter(Submission.exam_id == exam.id).count(): + return dict(status=500, message='Exam is not finalized but already has submissions.'), 500 else: - exam.delete() + # Delete any scans that were wrongly uploaded to this exam + for scan in exam.scans: + db.session.delete(scan) + + for widget in exam.widgets: + db.session.delete(widget) + + for problem in exam.problems: + for fb_option in problem.feedback_options: + db.session.delete(fb_option) + db.session.delete(problem.widget) + db.session.delete(problem) + + db.session.delete(exam) + db.session.commit() + + return dict(status=200, message="ok"), 200 def _get_all(self): """get list of uploaded exams. @@ -311,6 +329,7 @@ class ExamSource(Resource): return send_file( os.path.join(exam_dir, 'exam.pdf'), + cache_timeout=0, mimetype='application/pdf') diff --git a/zesje/api/scans.py b/zesje/api/scans.py index 88c42c553430f1bdc2a685277211d457e17a2829..d0eecd400d6da8522737f6337a864e37dc265cfe 100644 --- a/zesje/api/scans.py +++ b/zesje/api/scans.py @@ -67,7 +67,7 @@ class Scans(Resource): return dict(status=404, message='Exam does not exist.'), 404 scan = Scan(exam=exam, name=args['pdf'].filename, - status='processing', message='importing PDF') + status='processing', message='Waiting...') db.session.add(scan) db.session.commit() diff --git a/zesje/scans.py b/zesje/scans.py index 8e7efceff0edaa6cd57e1f31eaf9cf615d3546ff..a7436eaec63060a289d9354974548a6b6f6b5c49 100644 --- a/zesje/scans.py +++ b/zesje/scans.py @@ -11,6 +11,7 @@ import cv2 import numpy as np import PyPDF2 from PIL import Image +from wand.image import Image as WandImage from pylibdmtx import pylibdmtx from .database import db, Scan, Exam, Page, Student, Submission, Solution, ExamWidget @@ -67,6 +68,8 @@ def _process_pdf(scan_id, app_config): # Raises exception if zero or more than one scans found scan = Scan.query.filter(Scan.id == scan_id).one() + report_progress('Importing PDF') + pdf_path = os.path.join(data_directory, 'scans', f'{scan.id}.pdf') output_directory = os.path.join(data_directory, f'{scan.exam.id}_data') @@ -127,39 +130,133 @@ def exam_metadata(exam_id): def extract_images(filename): """Yield all images from a PDF file. + Tries to use PyPDF2 to extract the images from the given PDF. + If PyPDF2 fails to open the PDF or PyPDF2 is not able to extract + a page, it continues to use Wand for the rest of the pages. + """ + + with open(filename, "rb") as file: + use_wand = False + pypdf_reader = None + wand_image = None + total = 0 + + try: + pypdf_reader = PyPDF2.PdfFileReader(file) + total = pypdf_reader.getNumPages() + except Exception: + # Fallback to Wand if opening the PDF with PyPDF2 failed + use_wand = True + + if use_wand: + # If PyPDF2 failed we need Wand to count the number of pages + wand_image = WandImage(filename=filename, resolution=300) + total = len(wand_image.sequence) + + for pagenr in range(total): + if not use_wand: + try: + # Try to use PyPDF2, but catch any error it raises + img = extract_image_pypdf(pagenr, pypdf_reader) + + except Exception: + # Fallback to Wand if extracting with PyPDF2 failed + use_wand = True + + if use_wand: + if wand_image is None: + wand_image = WandImage(filename=filename, resolution=300) + img = extract_image_wand(pagenr, wand_image) + + if img.mode == 'L': + img = img.convert('RGB') + + yield img, pagenr+1 + + if wand_image is not None: + wand_image.close() + + +def extract_image_pypdf(pagenr, reader): + """Extracts an image as an array from the designated page + + This method uses PyPDF2 to extract the image and only works + when there is a single image present on the page. + + Raises an error if not exactly one image is found on the page + or the image filter is not `FlateDecode`. + Adapted from https://stackoverflow.com/a/34116472/2217463 - We raise if there are > 1 images / page + Parameters + ---------- + pagenr : int + Page number to extract + reader : PyPDF2.PdfFileReader instance + The reader to read the page from + + Returns + ------- + img_array : PIL Image + The extracted image data + + Raises + ------ + ValueError if not exactly one image is found on the page + + NotImplementedError if the image filter is not `FlateDecode` """ - reader = PyPDF2.PdfFileReader(open(filename, "rb")) - total = reader.getNumPages() - for pagenr in range(total): - page = reader.getPage(pagenr) - xObject = page['/Resources']['/XObject'].getObject() - - if sum((xObject[obj]['/Subtype'] == '/Image') - for obj in xObject) > 1: - raise RuntimeError(f'Page {pagenr + 1} contains more than 1 image,' - 'likely not a scan') - - for obj in xObject: - if xObject[obj]['/Subtype'] == '/Image': - data = xObject[obj].getData() - filter = xObject[obj]['/Filter'] - - if filter == '/FlateDecode': - size = (xObject[obj]['/Width'], xObject[obj]['/Height']) - if xObject[obj]['/ColorSpace'] == '/DeviceRGB': - mode = "RGB" - else: - mode = "P" - img = Image.frombytes(mode, size, data) + + page = reader.getPage(pagenr) + xObject = page['/Resources']['/XObject'].getObject() + + if sum((xObject[obj]['/Subtype'] == '/Image') + for obj in xObject) != 1: + raise ValueError + + for obj in xObject: + if xObject[obj]['/Subtype'] == '/Image': + data = xObject[obj].getData() + filter = xObject[obj]['/Filter'] + + if filter == '/FlateDecode': + size = (xObject[obj]['/Width'], xObject[obj]['/Height']) + if xObject[obj]['/ColorSpace'] == '/DeviceRGB': + mode = "RGB" else: - img = Image.open(BytesIO(data)) + mode = "P" + img = Image.frombytes(mode, size, data) + else: + raise NotImplementedError + + return img + + +def extract_image_wand(pagenr, wand_image): + """Flattens a page from a PDF to an image array + + This method uses Wand to flatten the page and extract the image. + + Parameters + ---------- + pagenr : int + Page number to extract, starting at 0 + wand_image : Wand Image instance + The Wand Image to read from + + Returns + ------- + img_array : PIL Image + The extracted image data + """ - if img.mode == 'L': - img = img.convert('RGB') - yield img, pagenr+1 + single_page = WandImage(wand_image.sequence[pagenr]) + single_page.format = 'jpg' + img_array = np.asarray(bytearray(single_page.make_blob(format="jpg")), dtype=np.uint8) + img = Image.open(BytesIO(img_array)) + img.load() # Load the data into the PIL image from the Wand image + single_page.close() # Then close the Wand image + return img def write_pdf_status(scan_id, status, message):