Verified Commit 06cab369 authored by Anton Akhmerov's avatar Anton Akhmerov
Browse files

properly check that the preprints are fresh

parent 3e20896b
Pipeline #25842 passed with stage
in 1 minute and 14 seconds
......@@ -19,8 +19,8 @@
# +
from pathlib import Path
from datetime import date
from time import sleep
from datetime import date, datetime, timedelta
from time import sleep, mktime
import random
import subprocess
import os
......@@ -43,19 +43,23 @@ from scipy.spatial import cKDTree
# -
def preprint_ids():
def preprint_ids(only_fresh=True):
feed = feedparser.parse('https://export.arxiv.org/rss/cond-mat')
if not feed['bozo']: # We succeeded
preprints = feed['entries']
return [
preprint['id'].split('abs/')[-1]
for preprint in preprints
if 'UPDATED' not in preprint['title']
# TODO: We could check if the new preprint version is improved
]
if feed['bozo']: # Fetch failed
return []
return []
published = datetime.fromtimestamp(mktime(feed.updated_parsed))
if only_fresh and datetime.now() - published > timedelta(days=1):
return []
preprints = feed['entries']
return [
preprint['id'].split('abs/')[-1]
for preprint in preprints
if 'UPDATED' not in preprint['title']
# TODO: We could check if the new preprint version is improved
]
todays_ids = preprint_ids()
......@@ -110,8 +114,6 @@ for paper_id in tqdm.tqdm(todays_ids):
# # Colormap detection
# +
ALLOWED_EXTS = ['png', 'jpg', 'jpeg', 'tiff']
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment