From a9754f46bc7f422eb1d3bf9d2d62b2f68afede61 Mon Sep 17 00:00:00 2001 From: Anton Akhmerov <anton.akhmerov@gmail.com> Date: Mon, 26 Mar 2018 02:18:28 +0200 Subject: [PATCH] implement exam summary statistics image --- zesje/api.py | 6 ++ zesje/helpers/db_helper.py | 73 ++++++++++++++++++++- zesje/resources/summary_plot.py | 110 ++++++++++++++++++++++++++++++++ 3 files changed, 187 insertions(+), 2 deletions(-) create mode 100644 zesje/resources/summary_plot.py diff --git a/zesje/api.py b/zesje/api.py index 84b630313..a9e8ffc93 100644 --- a/zesje/api.py +++ b/zesje/api.py @@ -8,6 +8,7 @@ from .resources.students import Students from .resources.submissions import Submissions from .resources import signature from .resources import images +from .resources import summary_plot from .resources.problems import Problems from .resources.feedback import Feedback @@ -47,3 +48,8 @@ api_bp.add_url_rule( 'solution', images.get, ) +api_bp.add_url_rule( + '/images/summary/<int:exam_id>', + 'exam_summary', + summary_plot.get, +) diff --git a/zesje/helpers/db_helper.py b/zesje/helpers/db_helper.py index 30b970ac4..d22e59506 100644 --- a/zesje/helpers/db_helper.py +++ b/zesje/helpers/db_helper.py @@ -1,4 +1,8 @@ -from ..models import Problem +import pandas +from pony import orm +from collections import namedtuple, OrderedDict, ChainMap + +from ..models import Exam, Problem, Student, Solution from . import yaml_helper def update_exam(exam, existing_yaml, new_yaml): @@ -16,8 +20,73 @@ def update_exam(exam, existing_yaml, new_yaml): new_problem_names = list(name for name in new_widgets.index if name != 'studentnr') - problems = list(Problem.select(lambda p: p.exam == exam) .order_by(lambda p: p.id)) for problem, name in zip(problems, new_problem_names): problem.name = name + + +def solution_data(exam_id, student_id): + """Return Python datastructures corresponding to the student submission.""" + with orm.db_session: + exam = Exam[exam_id] + student = Student[student_id] + if any(i is None for i in (exam, student)): + raise RuntimeError('Student did not make a ' + 'submission for this exam') + + results = [] + for problem in exam.problems.order_by(Problem.id): + if not orm.count(problem.solutions.feedback): + # Nobody received any grade for this problem + continue + problem_data = { + 'name': problem.name, + 'max_score': orm.max(problem.feedback_options.score, default=0) + } + solutions = Solution.select(lambda s: s.problem == problem + and s.submission.student == student) + problem_data['feedback'] = [ + {'short': fo.text, + 'score': fo.score, + 'description': fo.description} + for solution in solutions for fo in solution.feedback + ] + problem_data['score'] = sum(i['score'] or 0 + for i in problem_data['feedback']) + problem_data['remarks'] = '\n\n'.join(sol.remarks + for sol in solutions + if sol.remarks) + results.append(problem_data) + + student = student.to_dict() + + student['total'] = sum(i['score'] for i in results) + return student, results + + +def full_exam_data(exam_id): + """Compute all grades of an exam as a pandas DataFrame.""" + with orm.db_session: + students = sorted(Exam[exam_id].submissions.student.id) + + data = [solution_data(exam_id, student_id) + for student_id in students] + + students = pandas.DataFrame({i[0]['id']: i[0] for i in data}).T + del students['id'] + + results = {} + for result in data: + for problem in result[1]: + name = problem.pop('name') + problem[(name, 'remarks')] = problem.pop('remarks') + for fo in problem.pop('feedback'): + problem[(name, fo['short'])] = fo['score'] + problem[(name, 'total')] = problem.pop('score') + problem.pop('max_score') + results[result[0]['id']] = dict(ChainMap({('total', 'total'): + result[0]['total']}, + *result[1])) + + return pandas.DataFrame(results).T diff --git a/zesje/resources/summary_plot.py b/zesje/resources/summary_plot.py new file mode 100644 index 000000000..b2c4f940d --- /dev/null +++ b/zesje/resources/summary_plot.py @@ -0,0 +1,110 @@ +import os +from io import BytesIO +from flask import abort, Response, current_app as app +from pony import orm + +import pandas +import numpy as np + +from ..models import Exam, Submission +from ..helpers.db_helper import full_exam_data + +import matplotlib +matplotlib.use('agg') +import seaborn +from matplotlib import pyplot + +@orm.db_session +def get(exam_id): + """Plot exam summary statistics. + + Parameters + ---------- + exam_id : int + + Returns + ------- + Image (JPEG mimetype) + """ + try: + exam = Exam[exam_id] + except KeyError: + abort(404) + + scores = {problem.name: max(list(problem.feedback_options.score) + [0]) + for problem in exam.problems} + scores['total'] = sum(scores.values()) + + full_scores = full_exam_data(exam_id) + # Full exam data has multilevel columns (includes detailed feedback), we + # flatten them out first. + problem_scores = full_scores.iloc[ + :, full_scores.columns.get_level_values(1) == 'total' + ] + problem_scores.columns = problem_scores.columns.get_level_values(0) + # Exclude empty columns from statistics + problem_scores = problem_scores.loc[:, ~(problem_scores == 0).all()] + + seaborn.set() + seaborn.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5}) + + cm = matplotlib.cm.magma + # define the bins and normalize + bounds = np.linspace(0, 1, 21) + norm = matplotlib.colors.BoundaryNorm(bounds, cm.N) + + + maxes = pandas.DataFrame(problem_scores.max()) + maxes['max_rubric'] = maxes.index + maxes = maxes.replace({'max_rubric': scores}).max(axis=1) + + corrs = {column: (problem_scores[column] + .astype(float) + .corr(problem_scores + .total + .subtract(problem_scores[column]) + .astype(float) + ).round(2) + ) for column in problem_scores if column != 'total'} + + alpha = ((len(problem_scores) - 1) / (len(problem_scores) - 2) + * (1 - problem_scores.var()[:-1].sum() + / problem_scores.total.var()) + ) + + vals = [ + problem_scores[i].value_counts(normalize=True).sort_index().cumsum() + for i in problem_scores + ] + data = np.array( + [ + (-i, upper-lower, lower, num/maxes.ix[i]) + for i, val in enumerate(vals) + for num, upper, lower in zip( + val.index, val.data, [0] + list(val.data[:-1]) + ) + ] + ).T + fig = pyplot.figure(figsize=(12, 9)) + ax = fig.add_subplot(1, 1, 1) + ax.barh( + data[0], data[1], 0.5, data[2], color=cm(norm(data[3])), align='center' + ) + ax.set_yticks(np.arange(0, -len(problem_scores.columns), -1)); + ax.set_yticklabels( + [f'{i} ($Rir={corrs[i]:.2f}$)' for i in problem_scores.columns[:-1]] + + [f'total: ($\\alpha = {alpha:.2f}$)'] + ) + ax.set_xlabel('fraction of students') + ax.set_xlim(-0.025, 1.025) + sm = matplotlib.cm.ScalarMappable(cmap=cm, norm=norm) + sm._A = [] + colorbar = fig.colorbar(sm) + colorbar.set_ticks(np.linspace(0, 1, 11)) + colorbar.set_label('score percentage') + + pyplot.tight_layout() + image = BytesIO() + pyplot.savefig(image) + + return Response(image.getvalue(), 200, mimetype='image/jpeg') -- GitLab