Commit 0e14e07c authored by Guilhem Saurel's avatar Guilhem Saurel
Browse files

Merge remote-tracking branch 'website/master' into website/

parents 64da4f8a 152186bd
[submodule ""]
path = website/scholar
url =
# Tools for Gepetto's website
## Bibtex Databases
`` is a little script that will check the bibtex databases in `projects:/www/html/projects/gepetto/bib` are up
to date.
For now, it will compare them to HAL's one.
### Get dependencies
Using of [virtualenvwrapper]( on bash or zsh, or
[virtualfish]( on [fish]( is strongly
recommended. Do not forget that this project uses Python3 (→ `mkvirtualenv -p python3
gepetto_website_tools_venv` / `vf new -p python3 gepetto_website_tools_venv`)
Then, if you have [pip-tools](
pip install -U -r requirements.txt
### HOWTO use it
First, let's update hal.bib:
wget -O hal.bib '*&sort=submittedDate_tdate+desc&fq=collCode_s%3ALAAS-GEPETTO&defType=edismax&rows=200'
Then, you need to copy all bib files from `projects:/www/html/projects/gepetto/bib` to `bib`.
Finally, you can launch the script `./`
#!/usr/bin/env python3
import re
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from bibtexparser import loads
from requests import get
from requests_futures.sessions import FuturesSession
HAL_RE = [
(re.compile(r'(hal|tel|inria|lirmm)-\d{8}?', re.I),
lambda g: g),
lambda g: 'hal-%08i' % int(g.replace('/', ''))),
HAL_URL = ''
HAL_KEYS = ['url', 'link', 'pdf', 'video']
USELESS_KEYS = {'hal_local_reference', 'hal_version', 'address', 'note', 'month'}
'ad': ['del prete'],
'ao': ['orthey'],
'bt': ['tondu'],
'cv': ['vassallo'],
'fl': ['lamiraux', 'perrin', 'dalibard'],
'gs': ['saurel'],
'jpl': ['laumond'],
'mc': ['campana'],
'mt': ['taïx', 'ta{\\"i}x'],
'nm': ['mansard', 'ramos', 'sol{\\`a}'],
'or': ['roussel'],
'os': ['stasse'],
'psa': ['salaris'],
'ps': ['souères', 'sou{\\`e}res'],
'st': ['tonneau'],
'test': ['test'],
def load(f):
return loads(re.sub('@String\((.*)\)', '@String{\\1}',
def same_entries(a, b):
""" checks if entries a and b represent the same publication """
for key in ['ID', 'doi', 'hal_id', 'title', 'chapter']:
if key in a and key in b and a[key].lower() == b[key].lower():
return True
if 'title' in a and 'chapter' in b and a['title'].lower() == b['chapter'].lower():
return True
if 'title' in b and 'chapter' in a and b['title'].lower() == a['chapter'].lower():
return True
return False
def parse_hal_id(entry, hal_db):
""" Tries to find HAL_ID in an entry"""
if 'hal_id' in entry:
return entry['hal_id']
if 'doi' in entry:
for hal_entry in hal_db.entries:
if 'doi' in hal_entry and hal_entry['doi'] == entry['doi']:
return hal_entry['hal_id']
for regex, ret in HAL_RE:
for key in HAL_KEYS:
if key in entry:
match =[key])
if match:
return ret(
for hal_entry in hal_db.entries:
if same_entries(entry, hal_entry):
return hal_entry['hal_id']
def get_hal_entry(hal_id, hal_db):
""" Get the entry of HAL_ID as generated by HAL. Tries local first, then online """
for key in hal_db.entries_dict.keys():
if key.endswith(hal_id):
return hal_db.entries_dict[key]
url = HAL_URL % hal_id
r = get(url)
if 'Aucun document trouvé' in r.content.decode():
print('fail on', url)
hal_entry = loads(r.content.decode()).entries[0]
print('HAL_ENTRY for {ID} ({hal_id}) not found on local hal db. Got Online one.'.format(**hal_entry))
return hal_entry
def check_hal(entry, hal_db):
""" Checks our DB is update with HAL """
hal_id = parse_hal_id(entry, hal_db)
if not hal_id:
HAL_DICT[hal_id] = entry
hal_entry = get_hal_entry(hal_id, hal_db)
keys = (set(entry.keys()) | set(hal_entry.keys())) - USELESS_KEYS
for key in keys:
if key not in entry:
print('IN HAL for %s: %s = {%s},' % (entry['ID'], key, hal_entry[key]))
def compare_scholar_entries(entry, scholar_entry, initials):
""" Compare our entry and a Google Scholar entry """
keys = (set(entry.keys()) | set(scholar_entry.keys())) - USELESS_KEYS
for key in keys:
if key not in entry:
print('IN SCHOLAR for %s.bib/%s: %s = {%s},' % (initials, entry['ID'], key, scholar_entry[key]))
def check_on_site(entries):
""" Checks those entries on gepetto's website """
session = FuturesSession(executor=ThreadPoolExecutor(max_workers=40))
for future in [session.get(GEPETTO_URL % entry['ID']) for entry in entries]:
response = future.result()
if b'Invalid bibtex entry' in response.content:
print('INVALID', response.url)
def header(title, lvl=1):
c = '=' if lvl == 1 else '*' if lvl == 2 else '-'
print(c * 20, '{:^20}'.format(title), c * 20)
if __name__ == '__main__':
with open('hal.bib') as hal_file:
hal_db = load(hal_file)
with open('scholar.bib') as scholar_file:
scholar_db = load(scholar_file)
dbs = {}
not_in_dbs = {key: [] for key in TEAM_NAMES.keys()}
# Check our entries against HAL
header("us → HAL")
for path in Path('bib').glob('*.bib'):
header(, 2)
with as f:
dbs[path.stem] = load(f)
for entry in dbs[path.stem].entries:
check_hal(entry, hal_db)
# Check that HAL entries are in our DB
header("HAL → us")
for entry in hal_db.entries:
if entry['hal_id'] in HAL_DICT:
for initials, names in TEAM_NAMES.items():
if any(name in entry['author'].lower() for name in names):
not_in_dbs[initials].append((entry['ID'], entry['link'], entry['title']))
print('IN HAL ??:', entry['hal_id'], entry['author'])
for initials, names in TEAM_NAMES.items():
if not_in_dbs[initials]:
header(names[0], 2)
for hal_id, url, title in not_in_dbs[initials]:
for entry in dbs[initials].entries:
if 'title' in entry and entry['title'] == title:
print('CHECK HAL {:^30} {}'.format(entry['ID'], hal_id))
print('ONLY IN HAL {:^30} {}'.format(hal_id, title))
# Checks that Scholar entries are in our DB
header("scholar → us")
for scholar_entry in scholar_db.entries:
if 'author' not in scholar_entry:
print('SCHOLAR W/O AUTHOR:', scholar_entry['ID'])
for initials, names in TEAM_NAMES.items():
if any(name in scholar_entry['author'].lower() for name in names):
for entry in dbs[initials].entries:
if same_entries(entry, scholar_entry):
compare_scholar_entries(entry, scholar_entry, initials)
print('IN SCHOLAR ??:', scholar_entry['ID'])
# Checks our entries work on our website
header("us → our website")
entries = [entry for key in dbs.keys() for entry in dbs[key].entries]
# check_on_site(entries)
This diff is collapsed.
rm -f scholar.bib
for author in "Jean-Paul Laumond" "Philippe Souères" "Florent Lamiraux" "Nicolas Mansard" "Olivier Stasse" "Michel Taïx" "Bertrand Tondu" "Bruno Watier" "Mehdi Benallegue" "Andrea Del Prete" "Paolo Salaris" "Naoko Abe" "Steve Tonneau" "Oscar Ramos" "Andreas Orthey" "Olivier Roussel" "Aiva Simaite" "Joseph Mirabel" "Nirmal Giftsun" "Justin Carpentier" "Maximilien Naveau" "Christian Vassallo" "Ganesh Kumar" "Antonio El Khoury" "Guilhem Saurel" "Mylène Campana" "Alexis Mifsud"
echo $author
echo "// ------- $author ----------" >> scholar.bib
./scholar/ --author "$author" --citation bt >> scholar.bib
This diff is collapsed.
\ No newline at end of file
# This file is autogenerated by pip-compile
# To update, run:
# pip-compile --output-file requirements.txt
certifi==2017.7.27.1 # via requests
chardet==3.0.4 # via requests
idna==2.6 # via requests
urllib3==1.22 # via requests
Subproject commit a6c34443c8b766988e87b7df05767357f3e60b24
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment