Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • zesje/zesje
  • jbweston/grader_app
  • dj2k/zesje
  • MrHug/zesje
  • okaaij/zesje
  • tsoud/zesje
  • pimotte/zesje
  • works-on-my-machine/zesje
  • labay11/zesje
  • reouvenassouly/zesje
  • t.v.aerts/zesje
  • giuseppe.deininger/zesje
12 results
Show changes
......@@ -63,10 +63,11 @@ class Exam(db.Model):
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(Text, nullable=False)
token = Column(String(token_length), unique=True, default=_generate_exam_token)
submissions = db.relationship('Submission', backref='exam', lazy=True)
problems = db.relationship('Problem', backref='exam', order_by='Problem.id', lazy=True)
scans = db.relationship('Scan', backref='exam', lazy=True)
widgets = db.relationship('ExamWidget', backref='exam', order_by='ExamWidget.id', lazy=True)
submissions = db.relationship('Submission', backref='exam', cascade='all', lazy=True)
problems = db.relationship('Problem', backref='exam', cascade='all', order_by='Problem.id', lazy=True)
scans = db.relationship('Scan', backref='exam', cascade='all', lazy=True)
widgets = db.relationship('ExamWidget', backref='exam', cascade='all',
order_by='ExamWidget.id', lazy=True)
finalized = Column(Boolean, default=False, server_default='f')
......@@ -76,8 +77,9 @@ class Submission(db.Model):
id = Column(Integer, primary_key=True, autoincrement=True)
copy_number = Column(Integer, nullable=False)
exam_id = Column(Integer, ForeignKey('exam.id'), nullable=False)
solutions = db.relationship('Solution', backref='submission', order_by='Solution.problem_id', lazy=True)
pages = db.relationship('Page', backref='submission', lazy=True)
solutions = db.relationship('Solution', backref='submission', cascade='all',
order_by='Solution.problem_id', lazy=True)
pages = db.relationship('Page', backref='submission', cascade='all', lazy=True)
student_id = Column(Integer, ForeignKey('student.id'), nullable=True)
signature_validated = Column(Boolean, default=False, server_default='f', nullable=False)
......@@ -97,9 +99,10 @@ class Problem(db.Model):
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(Text, nullable=False)
exam_id = Column(Integer, ForeignKey('exam.id'), nullable=False)
feedback_options = db.relationship('FeedbackOption', backref='problem', order_by='FeedbackOption.id', lazy=True)
solutions = db.relationship('Solution', backref='problem', lazy=True)
widget = db.relationship('ProblemWidget', backref='problem', uselist=False, lazy=True)
feedback_options = db.relationship('FeedbackOption', backref='problem', cascade='all',
order_by='FeedbackOption.id', lazy=True)
solutions = db.relationship('Solution', backref='problem', cascade='all', lazy=True)
widget = db.relationship('ProblemWidget', backref='problem', cascade='all', uselist=False, lazy=True)
@hybrid_property
def mc_options(self):
......
......@@ -8,10 +8,12 @@ from email.mime.base import MIMEBase
from email import encoders
import jinja2
from wand.image import Image
from reportlab.pdfgen import canvas
from .database import Submission
from . import statistics
from .api.exams import PAGE_FORMATS
def solution_pdf(exam_id, student_id):
......@@ -20,17 +22,17 @@ def solution_pdf(exam_id, student_id):
pages = sorted((p for s in subs for p in s.pages), key=(lambda p: p.number))
pages = [p.path for p in pages]
with Image() as output_pdf:
for filepath in pages:
with Image(filename=filepath) as page:
output_pdf.sequence.append(page)
output_pdf.format = 'pdf'
result = BytesIO()
from flask import current_app
page_format = current_app.config.get('PAGE_FORMAT', 'A4') # TODO Remove default value
page_size = PAGE_FORMATS[page_format]
output_pdf.save(file=result)
result = BytesIO()
pdf = canvas.Canvas(result, pagesize=page_size)
for page in pages:
pdf.drawImage(page, 0, 0, width=page_size[0], height=page_size[1])
pdf.showPage()
pdf.save()
result.seek(0)
return result
......
......@@ -33,8 +33,8 @@ def create_app():
)
app.config.update(
CELERY_BROKER_URL='redis://localhost:6379',
CELERY_RESULT_BACKEND='redis://localhost:6379'
CELERY_BROKER_URL='redis://localhost:6479',
CELERY_RESULT_BACKEND='redis://localhost:6479'
)
db.init_app(app)
......
......@@ -2,7 +2,42 @@
import numpy as np
from operator import sub, add
def add_tup(tup1, tup2):
"""
Adds two tuples
Parameters
----------
tup1 : tuple
Tuple 1
tup2 : tuple
Tuple 2
Returns
-------
tup : tuple
The tuple with the sum of the values in tup1 and tup2.
"""
return tup1[0] + tup2[0], tup1[1] + tup2[1]
def sub_tup(tup1, tup2):
"""Subtracts two tuples
Parameters
----------
tup1 : tuple
Tuple 1
tup2 : tuple
Tuple 2
Returns
-------
tup : tuple
The tuple with the difference between the values in tup1 and tup2.
"""
return tup1[0] - tup2[0], tup1[1] - tup2[1]
def guess_dpi(image_array):
......@@ -38,66 +73,112 @@ def get_box(image_array, box, padding=0.3):
return image_array[top:bottom, left:right]
def fix_corner_markers(corner_keypoints, shape):
def get_corner_marker_sides(corner_markers, shape):
"""Divides a list of corner markers in the right sides:
Parameters
----------
corner_markers : list of tuples
The list of corner marker points
shape: tuple
The shape of an image
Returns
-------
tuples : tuple
The corner markers divided into sides
"""
def get_val(tup_list):
"""
Returns a tuple if present in the list.
Parameters
----------
tup_list : list of tuples
List with one tuple
Returns
-------
tup : tuple or None
Tuple in list or empty list
"""
return tup_list[0] if tup_list else None
x_sep = shape[1] / 2
y_sep = shape[0] / 2
top_left = get_val([(x, y) for x, y in corner_markers if x < x_sep and y < y_sep])
top_right = get_val([(x, y) for x, y in corner_markers if x > x_sep and y < y_sep])
bottom_left = get_val([(x, y) for x, y in corner_markers if x < x_sep and y > y_sep])
bottom_right = get_val([(x, y) for x, y in corner_markers if x > x_sep and y > y_sep])
return top_left, top_right, bottom_left, bottom_right
def get_delta(top_left, top_right, bottom_left, bottom_right):
"""Returns the absolute difference between the left or right points
Parameters
top_left : tuple
Top left point
top_right : tuple
Top right point
bottom_left : tuple
Bottom left point
bottom_right : tuple
Bottom right point
Returns
-------
delta : tuple
The absolute difference as an (x, y) tuple
"""
Corrects the list of corner markers if only three corner markers are found.
This function raises if less than three corner markers are detected.
if not top_left or not bottom_left:
return sub_tup(top_right, bottom_right)
return sub_tup(top_left, bottom_left)
def fix_corner_markers(corner_keypoints, shape):
"""Corrects the list of corner markers if three corner markers are found.
This function raises if less than three corner markers are found.
Parameters
----------
corner_keypoints :
corner_keypoints : list of tuples
List of corner marker locations as tuples
shape :
shape : (float, float, int)
Shape of the image in (x, y, dim)
Returns
-------
corner_keypoints :
fixed_corners : (float, float)
A list of four corner markers.
"""
if len(corner_keypoints) == 4:
return corner_keypoints
if len(corner_keypoints) < 3:
raise RuntimeError("Fewer than 3 corner markers found while trying to fix corners")
x_sep = shape[1] / 2
y_sep = shape[0] / 2
top_left = [(x, y) for x, y in corner_keypoints if x < x_sep and y < y_sep]
bottom_left = [(x, y) for x, y in corner_keypoints if x < x_sep and y > y_sep]
top_right = [(x, y) for x, y in corner_keypoints if x > x_sep and y < y_sep]
bottom_right = [(x, y) for x, y in corner_keypoints if x > x_sep and y > y_sep]
top_left, top_right, bottom_left, bottom_right = get_corner_marker_sides(corner_keypoints, shape)
delta = get_delta(top_left, top_right, bottom_left, bottom_right)
missing_point = ()
# index = 0
if not top_left:
# Top left point is missing
(dx, dy) = tuple(map(sub, top_right[0], bottom_right[0]))
missing_point = tuple(map(add, bottom_left[0], (dx, dy)))
index = 0
elif not bottom_left:
# Bottom left point is missing
(dx, dy) = tuple(map(sub, top_right[0], bottom_right[0]))
missing_point = tuple(map(sub, top_left[0], (dx, dy)))
index = 2
elif not top_right:
# Top right point is missing
(dx, dy) = tuple(map(sub, top_left[0], bottom_left[0]))
missing_point = tuple(map(add, bottom_right[0], (dx, dy)))
index = 1
elif not bottom_right:
# bottom right
(dx, dy) = tuple(map(sub, top_left[0], bottom_left[0]))
missing_point = tuple(map(sub, top_right[0], (dx, dy)))
index = 3
corner_keypoints.insert(index, missing_point)
return corner_keypoints
top_left = add_tup(bottom_left, delta)
if not top_right:
top_right = add_tup(bottom_right, delta)
if not bottom_left:
bottom_left = sub_tup(top_left, delta)
if not bottom_right:
bottom_right = sub_tup(top_right, delta)
return [top_left, top_right, bottom_left, bottom_right]
def box_is_filled(image_array, box_coords, padding=0.3, threshold=150, pixels=False):
......
from io import BytesIO
from tempfile import NamedTemporaryFile
import PIL
from pdfrw import PdfReader, PdfWriter, PageMerge
from pystrich.datamatrix import DataMatrixEncoder
from pylibdmtx.pylibdmtx import encode
from reportlab.lib.units import mm
from reportlab.pdfgen import canvas
......@@ -35,9 +34,9 @@ def generate_pdfs(exam_pdf_file, exam_id, copy_nums, output_paths, id_grid_x,
"""
Generate the final PDFs from the original exam PDF.
To maintain a consistent size of the DataMatrix codes, adhere to (# of
letters in exam ID) + 2 * (# of digits in exam ID) = C for a certain
constant C. The reason for this is that pyStrich encodes two digits in as
To ensure the page information fits into the datamatrix grid, adhere to
(# of letters in exam ID) + 2 * (# of digits in exam ID) = C for a certain
constant C. The reason for this is that libdmtx encodes two digits in as
much space as one letter.
If maximum interchangeability with version 1 QR codes is desired (error
......@@ -205,9 +204,9 @@ def generate_datamatrix(exam_id, page_num, copy_num):
"""
Generates a DataMatrix code to be used on a page.
To maintain a consistent size of the DataMatrix codes, adhere to (# of
letters in exam ID) + 2 * (# of digits in exam ID) = C for a certain
constant C. The reason for this is that pyStrich encodes two digits in as
To ensure the page information fits into the datamatrix grid, adhere to
(# of letters in exam ID) + 2 * (# of digits in exam ID) = C for a certain
constant C. The reason for this is that pylibdmtx encodes two digits in as
much space as one letter.
If maximum interchangeability with version 1 QR codes is desired (error
......@@ -232,8 +231,10 @@ def generate_datamatrix(exam_id, page_num, copy_num):
data = f'{exam_id}/{copy_num:04d}/{page_num:02d}'
image_bytes = DataMatrixEncoder(data).get_imagedata(cellsize=2)
return PIL.Image.open(BytesIO(image_bytes))
encoded = encode(data.encode('utf-8'), size='18x18')
datamatrix = PIL.Image.frombytes('RGB', (encoded.width, encoded.height), encoded.pixels)
datamatrix = datamatrix.resize((44, 44)).convert('L')
return datamatrix
def _generate_overlay(canv, pagesize, exam_id, copy_num, num_pages, id_grid_x,
......@@ -242,9 +243,9 @@ def _generate_overlay(canv, pagesize, exam_id, copy_num, num_pages, id_grid_x,
Generates an overlay ('watermark') PDF, which can then be overlaid onto
the exam PDF.
To maintain a consistent size of the DataMatrix codes in the overlay,
To ensure the page information fits into the datamatrix grid in the overlay,
adhere to (# of letters in exam ID) + 2 * (# of digits in exam ID) = C for
a certain constant C. The reason for this is that pyStrich encodes two
a certain constant C. The reason for this is that pylibdmtx encodes two
digits in as much space as one letter.
If maximum interchangeability with version 1 QR codes is desired (error
......@@ -276,10 +277,6 @@ def _generate_overlay(canv, pagesize, exam_id, copy_num, num_pages, id_grid_x,
"""
# Font settings for the copy number (printed under the datamatrix)
fontsize = 8
canv.setFont('Helvetica', fontsize)
# transform y-cooridate to different origin location
id_grid_y = pagesize[1] - id_grid_y
......@@ -296,6 +293,9 @@ def _generate_overlay(canv, pagesize, exam_id, copy_num, num_pages, id_grid_x,
else:
index = 0
max_index = 0
# Font settings for the copy number (printed under the datamatrix)
fontsize = 12
canv.setFont('Helvetica', fontsize)
for page_num in range(num_pages):
_add_corner_markers_and_bottom_bar(canv, pagesize)
......@@ -307,7 +307,7 @@ def _generate_overlay(canv, pagesize, exam_id, copy_num, num_pages, id_grid_x,
canv.drawInlineImage(datamatrix, datamatrix_x, datamatrix_y_adjusted)
canv.drawString(
datamatrix_x, datamatrix_y_adjusted - fontsize,
datamatrix_x, datamatrix_y_adjusted - (fontsize * 0.66),
f" # {copy_num}"
)
......
import os
from pdfminer3.converter import PDFPageAggregator
from pdfminer3.layout import LAParams
from pdfminer3.layout import LTFigure
from pdfminer3.layout import LTTextBoxHorizontal
from pdfminer3.pdfdocument import PDFDocument
from pdfminer3.pdfinterp import PDFResourceManager
from pdfminer3.pdfinterp import PDFPageInterpreter
from pdfminer3.pdfpage import PDFPage
from pdfminer3.pdfparser import PDFParser
from .api.exams import PAGE_FORMATS
def get_problem_title(problem, data_dir, page_format):
"""
Returns the title of a problem
Parameters
----------
data_dir : str
Location of the data folder
page_format : str
Format of the current page
problem : Problem
The currently selected problem
Returns
-------
title: str
The title of the problem, or an empty string if no text is found
"""
pdf_path = os.path.join(data_dir, f'{problem.exam_id}_data', 'exam.pdf')
fp = open(pdf_path, 'rb')
parser = PDFParser(fp)
document = PDFDocument(parser)
rsrcmgr = PDFResourceManager()
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
interpreter = PDFPageInterpreter(rsrcmgr, device)
# Get the other problems on the same page
problems_on_page = [p for p in problem.exam.problems if p.widget.page == problem.widget.page]
problems_on_page.sort(key=lambda prob: prob.widget.y)
idx = problems_on_page.index(problem)
# Determine y coordinates to search for text
if idx == 0:
y_above = 0
else:
problem_above = problems_on_page[idx - 1]
y_above = problem_above.widget.y + problem_above.widget.height
y_current = problem.widget.y + problem.widget.height
for page in PDFPage.create_pages(document):
interpreter.process_page(page)
layout = device.get_result()
if layout.pageid == problem.widget.page + 1:
filtered_words = get_words(layout._objs, y_above, y_current, page_format)
if not filtered_words:
return ''
lines = filtered_words[0].split('\n')
return lines[0]
return ''
def get_words(layout_objs, y_top, y_bottom, page_format):
"""
Returns the text from a pdf page within a specified height.
Pdfminer orients the coordinates of a layout object from
the bottom left.
Adapted from https://github.com/euske/pdfminer/issues/171
obj.bbox returns the following values: (x0, y0, x1, y1)
With
x0: the distance from the left of the page to the left edge of the box.
y0: the distance from the bottom of the page to the lower edge of the box.
x1: the distance from the left of the page to the right edge of the box.
y1: the distance from the bottom of the page to the upper edge of the box.
Parameters
----------
page_format : str
Format of the current page
layout_objs : list of layout objects
The list of objects in the page.
y_top : double
Highest top coordinate of each word
y_bottom : double
Lowest bottom coordinate of each word
Returns
-------
words : list of tuples
A list of tuples with the (y, text) values.
"""
page_height = PAGE_FORMATS[page_format][1]
words = []
for obj in layout_objs:
if isinstance(obj, LTTextBoxHorizontal):
if page_height - y_top > obj.bbox[1] > page_height - y_bottom:
words.append(obj.get_text())
elif isinstance(obj, LTFigure):
words.append(get_words(obj._objs, y_top, y_bottom, page_format))
return words
......@@ -4,20 +4,22 @@ import math
import os
from collections import namedtuple, Counter
from io import BytesIO
from tempfile import SpooledTemporaryFile
import signal
import cv2
import numpy as np
import PyPDF2
from pikepdf import Pdf, PdfImage
from PIL import Image
from wand.image import Image as WandImage
from pylibdmtx import pylibdmtx
from .database import db, Scan, Exam, Page, Student, Submission, Solution, ExamWidget
from .datamatrix import decode_raw_datamatrix
from .images import guess_dpi, get_box, fix_corner_markers
from .images import guess_dpi, get_box
from .factory import make_celery
from .pregrader import add_feedback_to_solution
from .images import fix_corner_markers
from .pdf_generation import MARKER_FORMAT, PAGE_FORMATS
......@@ -80,7 +82,9 @@ def _process_pdf(scan_id, app_config):
report_error(f'Error while reading Exam metadata: {e}')
raise
total = PyPDF2.PdfFileReader(open(pdf_path, "rb")).getNumPages()
with Pdf.open(pdf_path) as pdf_reader:
total = len(pdf_reader.pages)
failures = []
try:
for image, page in extract_images(pdf_path):
......@@ -131,70 +135,55 @@ def exam_metadata(exam_id):
def extract_images(filename):
"""Yield all images from a PDF file.
Tries to use PyPDF2 to extract the images from the given PDF.
If PyPDF2 fails to open the PDF or PyPDF2 is not able to extract
a page, it continues to use Wand for the rest of the pages.
Tries to use PikePDF to extract the images from the given PDF.
If PikePDF is not able to extract the image from a page,
it continues to use Wand to flatten the rest of the pages.
"""
with open(filename, "rb") as file:
with Pdf.open(filename) as pdf_reader:
use_wand = False
pypdf_reader = None
wand_image = None
total = 0
try:
pypdf_reader = PyPDF2.PdfFileReader(file)
total = pypdf_reader.getNumPages()
except Exception:
# Fallback to Wand if opening the PDF with PyPDF2 failed
use_wand = True
if use_wand:
# If PyPDF2 failed we need Wand to count the number of pages
wand_image = WandImage(filename=filename, resolution=300)
total = len(wand_image.sequence)
total = len(pdf_reader.pages)
for pagenr in range(total):
if not use_wand:
try:
# Try to use PyPDF2, but catch any error it raises
img = extract_image_pypdf(pagenr, pypdf_reader)
# Try to use PikePDF, but catch any error it raises
img = extract_image_pikepdf(pagenr, pdf_reader)
except Exception:
# Fallback to Wand if extracting with PyPDF2 failed
# Fallback to Wand if extracting with PikePDF failed
use_wand = True
if use_wand:
if wand_image is None:
wand_image = WandImage(filename=filename, resolution=300)
img = extract_image_wand(pagenr, wand_image)
img = extract_image_wand(pagenr, pdf_reader)
if img.mode == 'L':
img = img.convert('RGB')
yield img, pagenr+1
if wand_image is not None:
wand_image.close()
def extract_image_pypdf(pagenr, reader):
def extract_image_pikepdf(pagenr, reader):
"""Extracts an image as an array from the designated page
This method uses PyPDF2 to extract the image and only works
when there is a single image present on the page.
This method uses PikePDF to extract the image and only works
when there is a single image present on the page with the
same aspect ratio as the page.
Raises an error if not exactly one image is found on the page
or the image filter is not `FlateDecode`.
We do not check for the actual size of the image on the page,
since this size depends on the draw instruction rather than
the embedded image object available to pikepdf.
Adapted from https://stackoverflow.com/a/34116472/2217463
Raises an error if not exactly image is present or the image
does not have the same aspect ratio as the page.
Parameters
----------
pagenr : int
Page number to extract
reader : PyPDF2.PdfFileReader instance
The reader to read the page from
reader : pikepdf.Pdf instance
The pdf reader to read the page from
Returns
-------
......@@ -203,60 +192,71 @@ def extract_image_pypdf(pagenr, reader):
Raises
------
ValueError if not exactly one image is found on the page
NotImplementedError if the image filter is not `FlateDecode`
ValueError
if not exactly one image is found on the page or the image
does not have the same aspect ratio as the page
AttributeError
if no XObject or MediaBox is present on the page
"""
page = reader.getPage(pagenr)
xObject = page['/Resources']['/XObject'].getObject()
page = reader.pages[pagenr]
xObject = page.Resources.XObject
if sum((xObject[obj]['/Subtype'] == '/Image')
if sum((xObject[obj].Subtype == '/Image')
for obj in xObject) != 1:
raise ValueError
raise ValueError('Not exactly 1 image present on the page')
for obj in xObject:
if xObject[obj]['/Subtype'] == '/Image':
data = xObject[obj].getData()
filter = xObject[obj]['/Filter']
if xObject[obj].Subtype == '/Image':
pdfimage = PdfImage(xObject[obj])
pdf_width = float(page.MediaBox[2] - page.MediaBox[0])
pdf_height = float(page.MediaBox[3] - page.MediaBox[1])
if filter == '/FlateDecode':
size = (xObject[obj]['/Width'], xObject[obj]['/Height'])
if xObject[obj]['/ColorSpace'] == '/DeviceRGB':
mode = "RGB"
else:
mode = "P"
img = Image.frombytes(mode, size, data)
else:
raise NotImplementedError
ratio_width = pdfimage.width / pdf_width
ratio_height = pdfimage.height / pdf_height
return img
# Check if the aspect ratio of the image is the same as the
# aspect ratio of the page up to a 3% relative error
if abs(ratio_width - ratio_height) > 0.03 * ratio_width:
raise ValueError('Image has incorrect dimensions')
return pdfimage.as_pil_image()
def extract_image_wand(pagenr, wand_image):
def extract_image_wand(pagenr, reader):
"""Flattens a page from a PDF to an image array
This method uses Wand to flatten the page and extract the image.
This method uses Wand to flatten the page and creates an image.
Parameters
----------
pagenr : int
Page number to extract, starting at 0
wand_image : Wand Image instance
The Wand Image to read from
reader : pikepdf.Pdf instance
The pdf reader to read the page from
Returns
-------
img_array : PIL Image
The extracted image data
"""
page = reader.pages[pagenr]
page_pdf = Pdf.new()
page_pdf.pages.append(page)
with SpooledTemporaryFile() as page_file:
page_pdf.save(page_file)
with WandImage(blob=page_file._file.getvalue(), format='pdf', resolution=300) as page_image:
page_image.format = 'jpg'
img_array = np.asarray(bytearray(page_image.make_blob(format="jpg")), dtype=np.uint8)
img = Image.open(BytesIO(img_array))
img.load() # Load the data into the PIL image from the Wand image
single_page = WandImage(wand_image.sequence[pagenr])
single_page.format = 'jpg'
img_array = np.asarray(bytearray(single_page.make_blob(format="jpg")), dtype=np.uint8)
img = Image.open(BytesIO(img_array))
img.load() # Load the data into the PIL image from the Wand image
single_page.close() # Then close the Wand image
return img
......