Skip to content
Snippets Groups Projects
Commit 356c2209 authored by Guilhem Saurel's avatar Guilhem Saurel
Browse files

drop website utils

parent 8332a887
No related branches found
No related tags found
No related merge requests found
[submodule "scholar.py"]
path = website/scholar
url = git@github.com:nim65s/scholar.py.git
hal_generated.bib
[[source]]
url = "https://pypi.org/simple"
name = "pypi"
verify_ssl = true
[dev-packages]
[packages]
bibtexparser = "*"
pathlib = "*"
requests = "*"
requests-futures = "*"
"beautifulsoup4" = "*"
[requires]
python_version = "3.7"
{
"_meta": {
"hash": {
"sha256": "900141fc7b473c21dd9151b141c1df037af8b1da5d54ca8f9c04aae8e76cfddb"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.7"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"beautifulsoup4": {
"hashes": [
"sha256:194ec62a25438adcb3fdb06378b26559eda1ea8a747367d34c33cef9c7f48d57",
"sha256:90f8e61121d6ae58362ce3bed8cd997efb00c914eae0ff3d363c32f9a9822d10",
"sha256:f0abd31228055d698bb392a826528ea08ebb9959e6bea17c606fd9c9009db938"
],
"index": "pypi",
"version": "==4.6.3"
},
"bibtexparser": {
"hashes": [
"sha256:cc41cdd8332c2bf44b97daf1f135f4f267c3b744c33976655cd270b66f964c0a"
],
"index": "pypi",
"version": "==1.0.1"
},
"certifi": {
"hashes": [
"sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50",
"sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef"
],
"version": "==2019.9.11"
},
"chardet": {
"hashes": [
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
],
"version": "==3.0.4"
},
"future": {
"hashes": [
"sha256:858e38522e8fd0d3ce8f0c1feaf0603358e366d5403209674c7b617fa0c24093"
],
"version": "==0.18.1"
},
"idna": {
"hashes": [
"sha256:156a6814fb5ac1fc6850fb002e0852d56c0c8d2531923a51032d1b70760e186e",
"sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16"
],
"version": "==2.7"
},
"pathlib": {
"hashes": [
"sha256:6940718dfc3eff4258203ad5021090933e5c04707d5ca8cc9e73c94a7894ea9f"
],
"index": "pypi",
"version": "==1.0.1"
},
"pyparsing": {
"hashes": [
"sha256:6f98a7b9397e206d78cc01df10131398f1c8b8510a2f4d97d9abd82e1aacdd80",
"sha256:d9338df12903bbf5d65a0e4e87c2161968b10d2e489652bb47001d82a9b028b4"
],
"version": "==2.4.2"
},
"requests": {
"hashes": [
"sha256:99dcfdaaeb17caf6e526f32b6a7b780461512ab3f1d992187801694cba42770c",
"sha256:a84b8c9ab6239b578f22d1c21d51b696dcfe004032bb80ea832398d6909d7279"
],
"index": "pypi",
"version": "==2.20.0"
},
"requests-futures": {
"hashes": [
"sha256:76a22b95723267b53d8cc50e54d00b98d95afa02fd8449501b07a3797f46a96d"
],
"index": "pypi",
"version": "==0.9.8"
},
"urllib3": {
"hashes": [
"sha256:4c291ca23bbb55c76518905869ef34bdd5f0e46af7afe6861e8375643ffee1a0",
"sha256:9a247273df709c4fedb38c711e44292304f73f39ab01beda9f6b9fc375669ac3"
],
"index": "pypi",
"version": "==1.24.2"
}
},
"develop": {}
}
# Tools for Gepetto's website
## Bibtex Databases
`db.py` is a little script that will check the bibtex databases in `projects:/www/html/projects/gepetto/bib` are up
to date.
For now, it will compare them to HAL's one.
### Get dependencies
```
pipenv install
pipenv shell
```
### HOWTO use it
First, let's update hal.bib:
```bash
wget -O hal.bib 'https://api.archives-ouvertes.fr/search/LAAS-GEPETTO/?omitHeader=true&wt=bibtex&q=*&sort=submittedDate_tdate+desc&fq=collCode_s%3ALAAS-GEPETTO&defType=edismax&rows=200'
```
Then, you need to copy all bib files from `projects:/www/html/projects/gepetto/bib` to `bib`.
Finally, you can launch the script `./db.py`
*.bib
#!/usr/bin/env python3
import re
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from bibtexparser import loads
from requests import get
from requests_futures.sessions import FuturesSession
HAL_RE = [
(re.compile(r'(hal|tel|inria|lirmm)-\d{8}?', re.I),
lambda g: g),
(re.compile(r'/\d\d/\d\d/\d\d/\d\d/'),
lambda g: 'hal-%08i' % int(g.replace('/', ''))),
]
HAL_URL = 'https://hal.archives-ouvertes.fr/%s/bibtex'
HAL_KEYS = ['url', 'link', 'pdf', 'video']
HAL_DICT = {}
USELESS_KEYS = {'hal_local_reference', 'hal_version', 'address', 'note', 'month'}
GEPETTO_URL = 'http://projects.laas.fr/gepetto/index.php/Publications/BibtexEntry?bibtex=%s'
HAL_DUPS = {
'tel-01393217': 'tel-01589659',
'hal-01066574': 'hal-01113510',
'tel-01482297': 'tel-01547435',
}
TEAM_NAMES = {
'ad': ['del prete'],
'ao': ['orthey'],
'bt': ['tondu'],
'cv': ['vassallo'],
'fl': ['lamiraux', 'perrin', 'dalibard'],
'gs': ['saurel'],
'jpl': ['laumond'],
'mc': ['campana'],
'mt': ['taïx', 'ta{\\"i}x'],
'nm': ['mansard', 'ramos', 'sol{\\`a}'],
'or': ['roussel'],
'os': ['stasse'],
'psa': ['salaris'],
'ps': ['souères', 'sou{\\`e}res'],
'st': ['tonneau'],
'bw': ['watier'],
'test': ['test'],
}
def load(f):
return loads(re.sub('@String\((.*)\)', '@String{\\1}', f.read()))
def same_entries(a, b):
""" checks if entries a and b represent the same publication """
for key in ['ID', 'doi', 'hal_id', 'title', 'chapter']:
if key in a and key in b and a[key].lower() == b[key].lower():
return True
if 'title' in a and 'chapter' in b and a['title'].lower() == b['chapter'].lower():
return True
if 'title' in b and 'chapter' in a and b['title'].lower() == a['chapter'].lower():
return True
return False
def parse_hal_id(entry, hal_db):
""" Tries to find HAL_ID in an entry"""
if 'hal_id' in entry:
return entry['hal_id']
if 'doi' in entry:
for hal_entry in hal_db.entries:
if 'doi' in hal_entry and hal_entry['doi'] == entry['doi']:
return hal_entry['hal_id']
for regex, ret in HAL_RE:
for key in HAL_KEYS:
if key in entry:
match = regex.search(entry[key])
if match:
return ret(match.group())
for hal_entry in hal_db.entries:
if same_entries(entry, hal_entry):
return hal_entry['hal_id']
def get_hal_entry(hal_id, hal_db):
""" Get the entry of HAL_ID as generated by HAL. Tries local first, then online """
for key in hal_db.entries_dict.keys():
if key.endswith(hal_id):
return hal_db.entries_dict[key]
url = HAL_URL % hal_id
r = get(url)
r.raise_for_status()
if 'Aucun document trouvé' in r.content.decode():
print('fail on', url)
content = r.content.decode()
hal_entry = loads(content).entries[0]
with open('hal_generated.bib', 'a') as f:
f.write(content)
print('HAL_ENTRY for {ID} ({hal_id}) not found on local hal db. Got Online one.'.format(**hal_entry))
return hal_entry
def hal_dedup_id(hal_id):
return HAL_DUPS[hal_id] if hal_id in HAL_DUPS else hal_id
def check_hal(entry, hal_db):
""" Checks our DB is update with HAL """
hal_id = hal_dedup_id(parse_hal_id(entry, hal_db))
print(hal_id)
if not hal_id:
return
HAL_DICT[hal_id] = entry
hal_entry = get_hal_entry(hal_id, hal_db)
keys = (set(entry.keys()) | set(hal_entry.keys())) - USELESS_KEYS
for key in keys:
if key not in entry:
print('IN HAL for %s: %s = {%s},' % (entry['ID'], key, hal_entry[key]))
def compare_scholar_entries(entry, scholar_entry, initials):
""" Compare our entry and a Google Scholar entry """
keys = (set(entry.keys()) | set(scholar_entry.keys())) - USELESS_KEYS
for key in keys:
if key not in entry:
print('IN SCHOLAR for %s.bib/%s: %s = {%s},' % (initials, entry['ID'], key, scholar_entry[key]))
def check_on_site(entries):
""" Checks those entries on gepetto's website """
session = FuturesSession(executor=ThreadPoolExecutor(max_workers=40))
for future in [session.get(GEPETTO_URL % entry['ID']) for entry in entries]:
response = future.result()
if b'Invalid bibtex entry' in response.content:
print('INVALID', response.url)
def header(title, lvl=1):
c = '=' if lvl == 1 else '*' if lvl == 2 else '-'
print(c * 20, '{:^20}'.format(title), c * 20)
if __name__ == '__main__':
with open('hal.bib') as hal_file:
hal_db = load(hal_file)
with open('scholar.bib') as scholar_file:
scholar_db = load(scholar_file)
dbs = {}
not_in_dbs = {key: [] for key in TEAM_NAMES.keys()}
# Check our entries against HAL
header("us → HAL")
for path in Path('bib').glob('*.bib'):
header(path.name, 2)
with path.open() as f:
dbs[path.stem] = load(f)
for entry in dbs[path.stem].entries:
check_hal(entry, hal_db)
# Check that HAL entries are in our DB
header("HAL → us")
for entry in hal_db.entries:
if entry['hal_id'] in HAL_DICT:
continue
for initials, names in TEAM_NAMES.items():
if any(name in entry['author'].lower() for name in names):
not_in_dbs[initials].append((entry['ID'], entry['link'], entry['title']))
break
else:
print('IN HAL ??:', entry['hal_id'], entry['author'])
for initials, names in TEAM_NAMES.items():
if not_in_dbs[initials]:
header(names[0], 2)
for hal_id, url, title in not_in_dbs[initials]:
for entry in dbs[initials].entries:
if 'title' in entry and entry['title'] == title:
print('CHECK HAL {:^30} {}'.format(entry['ID'], hal_id))
break
else:
print('ONLY IN HAL {:^30} {}'.format(hal_id, title))
# Checks that Scholar entries are in our DB
header("scholar → us")
for scholar_entry in scholar_db.entries:
if 'author' not in scholar_entry:
print('SCHOLAR W/O AUTHOR:', scholar_entry['ID'])
continue
for initials, names in TEAM_NAMES.items():
if any(name in scholar_entry['author'].lower() for name in names):
for entry in dbs[initials].entries:
if same_entries(entry, scholar_entry):
compare_scholar_entries(entry, scholar_entry, initials)
break
else:
continue
break
else:
print('IN SCHOLAR ??:', scholar_entry['ID'])
# Checks our entries work on our website
header("us → our website")
entries = [entry for key in dbs.keys() for entry in dbs[key].entries]
# check_on_site(entries)
/var/www/gepetto
\ No newline at end of file
This diff is collapsed.
#!/bin/bash
rm -f scholar.bib
for author in "Jean-Paul Laumond" "Philippe Souères" "Florent Lamiraux" "Nicolas Mansard" "Olivier Stasse" "Michel Taïx" "Bertrand Tondu" "Bruno Watier" "Mehdi Benallegue" "Andrea Del Prete" "Paolo Salaris" "Naoko Abe" "Steve Tonneau" "Oscar Ramos" "Andreas Orthey" "Olivier Roussel" "Aiva Simaite" "Joseph Mirabel" "Nirmal Giftsun" "Justin Carpentier" "Maximilien Naveau" "Christian Vassallo" "Ganesh Kumar" "Antonio El Khoury" "Guilhem Saurel" "Mylène Campana" "Alexis Mifsud"
do
echo $author
echo "// ------- $author ----------" >> scholar.bib
./scholar/scholar.py --author "$author" --citation bt >> scholar.bib
done
This diff is collapsed.
/www/html/projects/gepetto/
\ No newline at end of file
Subproject commit a6c34443c8b766988e87b7df05767357f3e60b24
// ------- Jean-Paul Laumond ----------
// ------- Philippe Souères ----------
// ------- Florent Lamiraux ----------
// ------- Nicolas Mansard ----------
// ------- Olivier Stasse ----------
// ------- Michel Taïx ----------
// ------- Bertrand Tondu ----------
// ------- Bruno Watier ----------
// ------- Mehdi Benallegue ----------
// ------- Andrea Del Prete ----------
// ------- Paolo Salaris ----------
// ------- Naoko Abe ----------
// ------- Steve Tonneau ----------
// ------- Oscar Ramos ----------
// ------- Andreas Orthey ----------
// ------- Olivier Roussel ----------
// ------- Aiva Simaite ----------
// ------- Joseph Mirabel ----------
// ------- Nirmal Giftsun ----------
// ------- Justin Carpentier ----------
// ------- Maximilien Naveau ----------
// ------- Christian Vassallo ----------
// ------- Ganesh Kumar ----------
// ------- Antonio El Khoury ----------
// ------- Guilhem Saurel ----------
// ------- Mylène Campana ----------
// ------- Alexis Mifsud ----------
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment