Compare revisions

5dc67747 · 5dc67747 · 5dc67747 · 5dc67747 · 5dc67747 · 5dc67747
--- a/zesje/database.py
+++ b/zesje/database.py
@@ -63,10 +63,11 @@ class Exam(db.Model):
    id = Column(Integer, primary_key=True, autoincrement=True)
    name = Column(Text, nullable=False)
    token = Column(String(token_length), unique=True, default=_generate_exam_token)
-    submissions = db.relationship('Submission', backref='exam', lazy=True)
-    problems = db.relationship('Problem', backref='exam', order_by='Problem.id', lazy=True)
-    scans = db.relationship('Scan', backref='exam', lazy=True)
-    widgets = db.relationship('ExamWidget', backref='exam', order_by='ExamWidget.id', lazy=True)
+    submissions = db.relationship('Submission', backref='exam', cascade='all', lazy=True)
+    problems = db.relationship('Problem', backref='exam', cascade='all', order_by='Problem.id', lazy=True)
+    scans = db.relationship('Scan', backref='exam', cascade='all', lazy=True)
+    widgets = db.relationship('ExamWidget', backref='exam', cascade='all',
+                              order_by='ExamWidget.id', lazy=True)
    finalized = Column(Boolean, default=False, server_default='f')


@@ -76,8 +77,9 @@ class Submission(db.Model):
    id = Column(Integer, primary_key=True, autoincrement=True)
    copy_number = Column(Integer, nullable=False)
    exam_id = Column(Integer, ForeignKey('exam.id'), nullable=False)
-    solutions = db.relationship('Solution', backref='submission', order_by='Solution.problem_id', lazy=True)
-    pages = db.relationship('Page', backref='submission', lazy=True)
+    solutions = db.relationship('Solution', backref='submission', cascade='all',
+                                order_by='Solution.problem_id', lazy=True)
+    pages = db.relationship('Page', backref='submission', cascade='all', lazy=True)
    student_id = Column(Integer, ForeignKey('student.id'), nullable=True)
    signature_validated = Column(Boolean, default=False, server_default='f', nullable=False)

@@ -97,9 +99,10 @@ class Problem(db.Model):
    id = Column(Integer, primary_key=True, autoincrement=True)
    name = Column(Text, nullable=False)
    exam_id = Column(Integer, ForeignKey('exam.id'), nullable=False)
-    feedback_options = db.relationship('FeedbackOption', backref='problem', order_by='FeedbackOption.id', lazy=True)
-    solutions = db.relationship('Solution', backref='problem', lazy=True)
-    widget = db.relationship('ProblemWidget', backref='problem', uselist=False, lazy=True)
+    feedback_options = db.relationship('FeedbackOption', backref='problem', cascade='all',
+                                       order_by='FeedbackOption.id', lazy=True)
+    solutions = db.relationship('Solution', backref='problem', cascade='all', lazy=True)
+    widget = db.relationship('ProblemWidget', backref='problem', cascade='all', uselist=False, lazy=True)

    @hybrid_property
    def mc_options(self):

--- a/zesje/emails.py
+++ b/zesje/emails.py
@@ -8,10 +8,12 @@ from email.mime.base import MIMEBase
 from email import encoders

 import jinja2
-from wand.image import Image
+
+from reportlab.pdfgen import canvas

 from .database import Submission
 from . import statistics
+from .api.exams import PAGE_FORMATS


 def solution_pdf(exam_id, student_id):
@@ -20,17 +22,17 @@ def solution_pdf(exam_id, student_id):
    pages = sorted((p for s in subs for p in s.pages), key=(lambda p: p.number))
    pages = [p.path for p in pages]

-    with Image() as output_pdf:
-        for filepath in pages:
-            with Image(filename=filepath) as page:
-                output_pdf.sequence.append(page)
-
-        output_pdf.format = 'pdf'
-
-        result = BytesIO()
+    from flask import current_app
+    page_format = current_app.config.get('PAGE_FORMAT', 'A4')  # TODO Remove default value
+    page_size = PAGE_FORMATS[page_format]

-        output_pdf.save(file=result)
+    result = BytesIO()
+    pdf = canvas.Canvas(result, pagesize=page_size)
+    for page in pages:
+        pdf.drawImage(page, 0, 0, width=page_size[0], height=page_size[1])
+        pdf.showPage()

+    pdf.save()
    result.seek(0)

    return result

--- a/zesje/factory.py
+++ b/zesje/factory.py
@@ -33,8 +33,8 @@ def create_app():
    )

    app.config.update(
-        CELERY_BROKER_URL='redis://localhost:6379',
-        CELERY_RESULT_BACKEND='redis://localhost:6379'
+        CELERY_BROKER_URL='redis://localhost:6479',
+        CELERY_RESULT_BACKEND='redis://localhost:6479'
    )

    db.init_app(app)

--- a/zesje/images.py
+++ b/zesje/images.py
@@ -2,7 +2,42 @@

 import numpy as np

-from operator import sub, add
+
+def add_tup(tup1, tup2):
+    """
+    Adds two tuples
+
+    Parameters
+    ----------
+    tup1 : tuple
+        Tuple 1
+    tup2 : tuple
+        Tuple 2
+
+    Returns
+    -------
+    tup : tuple
+        The tuple with the sum of the values in tup1 and tup2.
+    """
+    return tup1[0] + tup2[0], tup1[1] + tup2[1]
+
+
+def sub_tup(tup1, tup2):
+    """Subtracts two tuples
+
+    Parameters
+    ----------
+    tup1 : tuple
+        Tuple 1
+    tup2 : tuple
+        Tuple 2
+
+    Returns
+    -------
+    tup : tuple
+        The tuple with the difference between the values in tup1 and tup2.
+    """
+    return tup1[0] - tup2[0], tup1[1] - tup2[1]


 def guess_dpi(image_array):
@@ -38,66 +73,112 @@ def get_box(image_array, box, padding=0.3):
    return image_array[top:bottom, left:right]


-def fix_corner_markers(corner_keypoints, shape):
+def get_corner_marker_sides(corner_markers, shape):
+    """Divides a list of corner markers in the right sides:
+
+    Parameters
+    ----------
+    corner_markers : list of tuples
+        The list of corner marker points
+    shape: tuple
+        The shape of an image
+
+    Returns
+    -------
+    tuples : tuple
+        The corner markers divided into sides
+    """
+
+    def get_val(tup_list):
+        """
+        Returns a tuple if present in the list.
+
+        Parameters
+        ----------
+        tup_list : list of tuples
+            List with one tuple
+
+        Returns
+        -------
+        tup : tuple or None
+            Tuple in list or empty list
+        """
+        return tup_list[0] if tup_list else None
+
+    x_sep = shape[1] / 2
+    y_sep = shape[0] / 2
+
+    top_left = get_val([(x, y) for x, y in corner_markers if x < x_sep and y < y_sep])
+    top_right = get_val([(x, y) for x, y in corner_markers if x > x_sep and y < y_sep])
+    bottom_left = get_val([(x, y) for x, y in corner_markers if x < x_sep and y > y_sep])
+    bottom_right = get_val([(x, y) for x, y in corner_markers if x > x_sep and y > y_sep])
+
+    return top_left, top_right, bottom_left, bottom_right
+
+
+def get_delta(top_left, top_right, bottom_left, bottom_right):
+    """Returns the absolute difference between the left or right points
+
+    Parameters
+    top_left : tuple
+        Top left point
+    top_right : tuple
+        Top right point
+    bottom_left : tuple
+        Bottom left point
+    bottom_right : tuple
+        Bottom right point
+
+    Returns
+    -------
+    delta : tuple
+        The absolute difference as an (x, y) tuple
    """
-    Corrects the list of corner markers if only three corner markers are found.
-    This function raises if less than three corner markers are detected.
+
+    if not top_left or not bottom_left:
+        return sub_tup(top_right, bottom_right)
+
+    return sub_tup(top_left, bottom_left)
+
+
+def fix_corner_markers(corner_keypoints, shape):
+    """Corrects the list of corner markers if three corner markers are found.
+    This function raises if less than three corner markers are found.

    Parameters
    ----------
-    corner_keypoints :
+    corner_keypoints : list of tuples
        List of corner marker locations as tuples
-    shape :
+    shape : (float, float, int)
        Shape of the image in (x, y, dim)

    Returns
    -------
-    corner_keypoints :
+    fixed_corners : (float, float)
        A list of four corner markers.
    """
-
    if len(corner_keypoints) == 4:
        return corner_keypoints

    if len(corner_keypoints) < 3:
        raise RuntimeError("Fewer than 3 corner markers found while trying to fix corners")

-    x_sep = shape[1] / 2
-    y_sep = shape[0] / 2
-
-    top_left = [(x, y) for x, y in corner_keypoints if x < x_sep and y < y_sep]
-    bottom_left = [(x, y) for x, y in corner_keypoints if x < x_sep and y > y_sep]
-    top_right = [(x, y) for x, y in corner_keypoints if x > x_sep and y < y_sep]
-    bottom_right = [(x, y) for x, y in corner_keypoints if x > x_sep and y > y_sep]
+    top_left, top_right, bottom_left, bottom_right = get_corner_marker_sides(corner_keypoints, shape)
+    delta = get_delta(top_left, top_right, bottom_left, bottom_right)

-    missing_point = ()
-    # index = 0
    if not top_left:
-        # Top left point is missing
-        (dx, dy) = tuple(map(sub, top_right[0], bottom_right[0]))
-        missing_point = tuple(map(add, bottom_left[0], (dx, dy)))
-        index = 0
-
-    elif not bottom_left:
-        # Bottom left point is missing
-        (dx, dy) = tuple(map(sub, top_right[0], bottom_right[0]))
-        missing_point = tuple(map(sub, top_left[0], (dx, dy)))
-        index = 2
-
-    elif not top_right:
-        # Top right point is missing
-        (dx, dy) = tuple(map(sub, top_left[0], bottom_left[0]))
-        missing_point = tuple(map(add, bottom_right[0], (dx, dy)))
-        index = 1
-
-    elif not bottom_right:
-        # bottom right
-        (dx, dy) = tuple(map(sub, top_left[0], bottom_left[0]))
-        missing_point = tuple(map(sub, top_right[0], (dx, dy)))
-        index = 3
-
-    corner_keypoints.insert(index, missing_point)
-    return corner_keypoints
+        top_left = add_tup(bottom_left, delta)
+
+    if not top_right:
+        top_right = add_tup(bottom_right, delta)
+
+    if not bottom_left:
+        bottom_left = sub_tup(top_left, delta)
+
+    if not bottom_right:
+        bottom_right = sub_tup(top_right, delta)
+
+    return [top_left, top_right, bottom_left, bottom_right]


 def box_is_filled(image_array, box_coords, padding=0.3, threshold=150, pixels=False):

--- a/zesje/pdf_generation.py
+++ b/zesje/pdf_generation.py
-from io import BytesIO
 from tempfile import NamedTemporaryFile

 import PIL
 from pdfrw import PdfReader, PdfWriter, PageMerge
-from pystrich.datamatrix import DataMatrixEncoder
+from pylibdmtx.pylibdmtx import encode
 from reportlab.lib.units import mm
 from reportlab.pdfgen import canvas

@@ -35,9 +34,9 @@ def generate_pdfs(exam_pdf_file, exam_id, copy_nums, output_paths, id_grid_x,
    """
    Generate the final PDFs from the original exam PDF.

-    To maintain a consistent size of the DataMatrix codes, adhere to (# of
-    letters in exam ID) + 2 * (# of digits in exam ID) = C for a certain
-    constant C. The reason for this is that pyStrich encodes two digits in as
+    To ensure the page information fits into the datamatrix grid, adhere to
+    (# of letters in exam ID) + 2 * (# of digits in exam ID) = C for a certain
+    constant C. The reason for this is that libdmtx encodes two digits in as
    much space as one letter.

    If maximum interchangeability with version 1 QR codes is desired (error
@@ -205,9 +204,9 @@ def generate_datamatrix(exam_id, page_num, copy_num):
    """
    Generates a DataMatrix code to be used on a page.

-    To maintain a consistent size of the DataMatrix codes, adhere to (# of
-    letters in exam ID) + 2 * (# of digits in exam ID) = C for a certain
-    constant C. The reason for this is that pyStrich encodes two digits in as
+    To ensure the page information fits into the datamatrix grid, adhere to
+    (# of letters in exam ID) + 2 * (# of digits in exam ID) = C for a certain
+    constant C. The reason for this is that pylibdmtx encodes two digits in as
    much space as one letter.

    If maximum interchangeability with version 1 QR codes is desired (error
@@ -232,8 +231,10 @@ def generate_datamatrix(exam_id, page_num, copy_num):

    data = f'{exam_id}/{copy_num:04d}/{page_num:02d}'

-    image_bytes = DataMatrixEncoder(data).get_imagedata(cellsize=2)
-    return PIL.Image.open(BytesIO(image_bytes))
+    encoded = encode(data.encode('utf-8'), size='18x18')
+    datamatrix = PIL.Image.frombytes('RGB', (encoded.width, encoded.height), encoded.pixels)
+    datamatrix = datamatrix.resize((44, 44)).convert('L')
+    return datamatrix


 def _generate_overlay(canv, pagesize, exam_id, copy_num, num_pages, id_grid_x,
@@ -242,9 +243,9 @@ def _generate_overlay(canv, pagesize, exam_id, copy_num, num_pages, id_grid_x,
    Generates an overlay ('watermark') PDF, which can then be overlaid onto
    the exam PDF.

-    To maintain a consistent size of the DataMatrix codes in the overlay,
+    To ensure the page information fits into the datamatrix grid in the overlay,
    adhere to (# of letters in exam ID) + 2 * (# of digits in exam ID) = C for
-    a certain constant C. The reason for this is that pyStrich encodes two
+    a certain constant C. The reason for this is that pylibdmtx encodes two
    digits in as much space as one letter.

    If maximum interchangeability with version 1 QR codes is desired (error
@@ -276,10 +277,6 @@ def _generate_overlay(canv, pagesize, exam_id, copy_num, num_pages, id_grid_x,

    """

-    # Font settings for the copy number (printed under the datamatrix)
-    fontsize = 8
-    canv.setFont('Helvetica', fontsize)
-
    # transform y-cooridate to different origin location
    id_grid_y = pagesize[1] - id_grid_y

@@ -296,6 +293,9 @@ def _generate_overlay(canv, pagesize, exam_id, copy_num, num_pages, id_grid_x,
    else:
        index = 0
        max_index = 0
+    # Font settings for the copy number (printed under the datamatrix)
+    fontsize = 12
+    canv.setFont('Helvetica', fontsize)

    for page_num in range(num_pages):
        _add_corner_markers_and_bottom_bar(canv, pagesize)
@@ -307,7 +307,7 @@ def _generate_overlay(canv, pagesize, exam_id, copy_num, num_pages, id_grid_x,

        canv.drawInlineImage(datamatrix, datamatrix_x, datamatrix_y_adjusted)
        canv.drawString(
-            datamatrix_x, datamatrix_y_adjusted - fontsize,
+            datamatrix_x, datamatrix_y_adjusted - (fontsize * 0.66),
            f" # {copy_num}"
        )


--- a/zesje/pdf_reader.py
+++ b/zesje/pdf_reader.py
+import os
+
+from pdfminer3.converter import PDFPageAggregator
+from pdfminer3.layout import LAParams
+from pdfminer3.layout import LTFigure
+from pdfminer3.layout import LTTextBoxHorizontal
+from pdfminer3.pdfdocument import PDFDocument
+from pdfminer3.pdfinterp import PDFResourceManager
+from pdfminer3.pdfinterp import PDFPageInterpreter
+from pdfminer3.pdfpage import PDFPage
+from pdfminer3.pdfparser import PDFParser
+
+from .api.exams import PAGE_FORMATS
+
+
+def get_problem_title(problem, data_dir, page_format):
+    """
+    Returns the title of a problem
+
+    Parameters
+    ----------
+    data_dir : str
+        Location of the data folder
+    page_format : str
+        Format of the current page
+    problem : Problem
+        The currently selected problem
+
+    Returns
+    -------
+    title: str
+        The title of the problem, or an empty string if no text is found
+    """
+
+    pdf_path = os.path.join(data_dir, f'{problem.exam_id}_data', 'exam.pdf')
+
+    fp = open(pdf_path, 'rb')
+
+    parser = PDFParser(fp)
+    document = PDFDocument(parser)
+    rsrcmgr = PDFResourceManager()
+    laparams = LAParams()
+    device = PDFPageAggregator(rsrcmgr, laparams=laparams)
+    interpreter = PDFPageInterpreter(rsrcmgr, device)
+
+    # Get the other problems on the same page
+    problems_on_page = [p for p in problem.exam.problems if p.widget.page == problem.widget.page]
+    problems_on_page.sort(key=lambda prob: prob.widget.y)
+
+    idx = problems_on_page.index(problem)
+
+    # Determine y coordinates to search for text
+    if idx == 0:
+        y_above = 0
+    else:
+        problem_above = problems_on_page[idx - 1]
+        y_above = problem_above.widget.y + problem_above.widget.height
+
+    y_current = problem.widget.y + problem.widget.height
+
+    for page in PDFPage.create_pages(document):
+        interpreter.process_page(page)
+        layout = device.get_result()
+
+        if layout.pageid == problem.widget.page + 1:
+            filtered_words = get_words(layout._objs, y_above, y_current, page_format)
+
+            if not filtered_words:
+                return ''
+
+            lines = filtered_words[0].split('\n')
+            return lines[0]
+
+    return ''
+
+
+def get_words(layout_objs, y_top, y_bottom, page_format):
+    """
+    Returns the text from a pdf page within a specified height.
+    Pdfminer orients the coordinates of a layout object from
+    the bottom left.
+
+    Adapted from https://github.com/euske/pdfminer/issues/171
+    obj.bbox returns the following values: (x0, y0, x1, y1)
+
+    With
+    x0: the distance from the left of the page to the left edge of the box.
+    y0: the distance from the bottom of the page to the lower edge of the box.
+    x1: the distance from the left of the page to the right edge of the box.
+    y1: the distance from the bottom of the page to the upper edge of the box.
+
+    Parameters
+    ----------
+    page_format : str
+        Format of the current page
+    layout_objs : list of layout objects
+        The list of objects in the page.
+    y_top : double
+        Highest top coordinate of each word
+    y_bottom : double
+        Lowest bottom coordinate of each word
+
+    Returns
+    -------
+    words : list of tuples
+        A list of tuples with the (y, text) values.
+    """
+    page_height = PAGE_FORMATS[page_format][1]
+
+    words = []
+
+    for obj in layout_objs:
+        if isinstance(obj, LTTextBoxHorizontal):
+            if page_height - y_top > obj.bbox[1] > page_height - y_bottom:
+                words.append(obj.get_text())
+
+        elif isinstance(obj, LTFigure):
+            words.append(get_words(obj._objs, y_top, y_bottom, page_format))
+
+    return words
--- a/zesje/scans.py
+++ b/zesje/scans.py
@@ -4,20 +4,22 @@ import math
 import os
 from collections import namedtuple, Counter
 from io import BytesIO
+from tempfile import SpooledTemporaryFile
 import signal

 import cv2
 import numpy as np
-import PyPDF2
+from pikepdf import Pdf, PdfImage
 from PIL import Image
 from wand.image import Image as WandImage
 from pylibdmtx import pylibdmtx

 from .database import db, Scan, Exam, Page, Student, Submission, Solution, ExamWidget
 from .datamatrix import decode_raw_datamatrix
-from .images import guess_dpi, get_box, fix_corner_markers
+from .images import guess_dpi, get_box
 from .factory import make_celery
 from .pregrader import add_feedback_to_solution
+from .images import fix_corner_markers

 from .pdf_generation import MARKER_FORMAT, PAGE_FORMATS

@@ -80,7 +82,9 @@ def _process_pdf(scan_id, app_config):
        report_error(f'Error while reading Exam metadata: {e}')
        raise

-    total = PyPDF2.PdfFileReader(open(pdf_path, "rb")).getNumPages()
+    with Pdf.open(pdf_path) as pdf_reader:
+        total = len(pdf_reader.pages)
+
    failures = []
    try:
        for image, page in extract_images(pdf_path):
@@ -131,70 +135,55 @@ def exam_metadata(exam_id):
 def extract_images(filename):
    """Yield all images from a PDF file.

-    Tries to use PyPDF2 to extract the images from the given PDF.
-    If PyPDF2 fails to open the PDF or PyPDF2 is not able to extract
-    a page, it continues to use Wand for the rest of the pages.
+    Tries to use PikePDF to extract the images from the given PDF.
+    If PikePDF is not able to extract the image from a page,
+    it continues to use Wand to flatten the rest of the pages.
    """

-    with open(filename, "rb") as file:
+    with Pdf.open(filename) as pdf_reader:
        use_wand = False
-        pypdf_reader = None
-        wand_image = None
-        total = 0
-
-        try:
-            pypdf_reader = PyPDF2.PdfFileReader(file)
-            total = pypdf_reader.getNumPages()
-        except Exception:
-            # Fallback to Wand if opening the PDF with PyPDF2 failed
-            use_wand = True
-
-        if use_wand:
-            # If PyPDF2 failed we need Wand to count the number of pages
-            wand_image = WandImage(filename=filename, resolution=300)
-            total = len(wand_image.sequence)
+
+        total = len(pdf_reader.pages)

        for pagenr in range(total):
            if not use_wand:
                try:
-                    # Try to use PyPDF2, but catch any error it raises
-                    img = extract_image_pypdf(pagenr, pypdf_reader)
+                    # Try to use PikePDF, but catch any error it raises
+                    img = extract_image_pikepdf(pagenr, pdf_reader)

                except Exception:
-                    # Fallback to Wand if extracting with PyPDF2 failed
+                    # Fallback to Wand if extracting with PikePDF failed
                    use_wand = True

            if use_wand:
-                if wand_image is None:
-                    wand_image = WandImage(filename=filename, resolution=300)
-                img = extract_image_wand(pagenr, wand_image)
+                img = extract_image_wand(pagenr, pdf_reader)

            if img.mode == 'L':
                img = img.convert('RGB')

            yield img, pagenr+1

-        if wand_image is not None:
-            wand_image.close()
-

-def extract_image_pypdf(pagenr, reader):
+def extract_image_pikepdf(pagenr, reader):
    """Extracts an image as an array from the designated page

-    This method uses PyPDF2 to extract the image and only works
-    when there is a single image present on the page.
+    This method uses PikePDF to extract the image and only works
+    when there is a single image present on the page with the
+    same aspect ratio as the page.

-    Raises an error if not exactly one image is found on the page
-    or the image filter is not `FlateDecode`.
+    We do not check for the actual size of the image on the page,
+    since this size depends on the draw instruction rather than
+    the embedded image object available to pikepdf.

-    Adapted from https://stackoverflow.com/a/34116472/2217463
+    Raises an error if not exactly image is present or the image
+    does not have the same aspect ratio as the page.

    Parameters
    ----------
    pagenr : int
        Page number to extract
-    reader : PyPDF2.PdfFileReader instance
-        The reader to read the page from
+    reader : pikepdf.Pdf instance
+        The pdf reader to read the page from

    Returns
    -------
@@ -203,60 +192,71 @@ def extract_image_pypdf(pagenr, reader):

    Raises
    ------
-    ValueError if not exactly one image is found on the page
-
-    NotImplementedError if the image filter is not `FlateDecode`
+    ValueError
+        if not exactly one image is found on the page or the image
+        does not have the same aspect ratio as the page
+    AttributeError
+        if no XObject or MediaBox is present on the page
    """

-    page = reader.getPage(pagenr)
-    xObject = page['/Resources']['/XObject'].getObject()
+    page = reader.pages[pagenr]
+
+    xObject = page.Resources.XObject

-    if sum((xObject[obj]['/Subtype'] == '/Image')
+    if sum((xObject[obj].Subtype == '/Image')
            for obj in xObject) != 1:
-        raise ValueError
+        raise ValueError('Not exactly 1 image present on the page')

    for obj in xObject:
-        if xObject[obj]['/Subtype'] == '/Image':
-            data = xObject[obj].getData()
-            filter = xObject[obj]['/Filter']
+        if xObject[obj].Subtype == '/Image':
+            pdfimage = PdfImage(xObject[obj])
+
+            pdf_width = float(page.MediaBox[2] - page.MediaBox[0])
+            pdf_height = float(page.MediaBox[3] - page.MediaBox[1])

-            if filter == '/FlateDecode':
-                size = (xObject[obj]['/Width'], xObject[obj]['/Height'])
-                if xObject[obj]['/ColorSpace'] == '/DeviceRGB':
-                    mode = "RGB"
-                else:
-                    mode = "P"
-                img = Image.frombytes(mode, size, data)
-            else:
-                raise NotImplementedError
+            ratio_width = pdfimage.width / pdf_width
+            ratio_height = pdfimage.height / pdf_height

-            return img
+            # Check if the aspect ratio of the image is the same as the
+            # aspect ratio of the page up to a 3% relative error
+            if abs(ratio_width - ratio_height) > 0.03 * ratio_width:
+                raise ValueError('Image has incorrect dimensions')

+            return pdfimage.as_pil_image()

-def extract_image_wand(pagenr, wand_image):
+
+def extract_image_wand(pagenr, reader):
    """Flattens a page from a PDF to an image array

-    This method uses Wand to flatten the page and extract the image.
+    This method uses Wand to flatten the page and creates an image.

    Parameters
    ----------
    pagenr : int
        Page number to extract, starting at 0
-    wand_image : Wand Image instance
-        The Wand Image to read from
+    reader : pikepdf.Pdf instance
+        The pdf reader to read the page from

    Returns
    -------
    img_array : PIL Image
        The extracted image data
    """
+    page = reader.pages[pagenr]
+
+    page_pdf = Pdf.new()
+    page_pdf.pages.append(page)
+
+    with SpooledTemporaryFile() as page_file:
+
+        page_pdf.save(page_file)
+
+        with WandImage(blob=page_file._file.getvalue(), format='pdf', resolution=300) as page_image:
+            page_image.format = 'jpg'
+            img_array = np.asarray(bytearray(page_image.make_blob(format="jpg")), dtype=np.uint8)
+            img = Image.open(BytesIO(img_array))
+            img.load()  # Load the data into the PIL image from the Wand image

-    single_page = WandImage(wand_image.sequence[pagenr])
-    single_page.format = 'jpg'
-    img_array = np.asarray(bytearray(single_page.make_blob(format="jpg")), dtype=np.uint8)
-    img = Image.open(BytesIO(img_array))
-    img.load()  # Load the data into the PIL image from the Wand image
-    single_page.close()  # Then close the Wand image
    return img
No results found