Commit a1e8ad75 authored by Guilhem Saurel's avatar Guilhem Saurel
Browse files

download many videos

parent 612e9b81
#!/usr/bin/env python3
import argparse
import os
import os.path
import xml.etree.ElementTree as ET
from pathlib import Path
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
parser = argparse.ArgumentParser(description='Get video(s) from BBB')
parser.add_argument('--meeting', default='e95c2bd3e4e7ee81bd53eaae33f147b86f5c8384-1593758725366')
parser.add_argument('--account')
parser.add_argument('--server', default='webconf.laas.fr')
FILES = [
......@@ -20,7 +23,11 @@ FILES = [
def download_from_url(url, dst):
"""thx https://gist.github.com/wy193777/0e2a4932e81afc6aa4c8f7a2984f34e2"""
file_size = int(requests.head(url).headers["Content-Length"])
try:
file_size = int(requests.head(url).headers["Content-Length"])
except KeyError:
print(url, requests.head(url).headers)
return
first_byte = os.path.getsize(dst) if os.path.exists(dst) else 0
if first_byte >= file_size:
return file_size
......@@ -56,6 +63,28 @@ def download(server, meeting):
download_from_url(f'https://{server}/presentation/{meeting}/{filename}', filepath)
def get_all_videos(server, account):
url = f'https://{server}/b/{account}'
print(url)
soup = BeautifulSoup(requests.get(url).content.decode(), features='lxml')
for link in soup.find_all('div', id='recording-title'):
title = link.text.strip().replace(' ', '-')
filename = f'{server}__{account}__{title}.mp4'
if os.path.exists(filename):
print(f'skipping {title}')
continue
record = link.get('data-recordid')
print()
print('=' * 80)
print(record, title)
print()
download(server, record)
# os.symlink(Path('data') / record, Path('data') / title)
if __name__ == '__main__':
args = parser.parse_args()
download(server=args.server, meeting=args.meeting)
if args.account:
get_all_videos(server=args.server, account=args.account)
else:
download(server=args.server, meeting=args.meeting)
......@@ -15,6 +15,32 @@ optional = false
python-versions = "*"
version = "0.2.0"
[[package]]
category = "main"
description = "Screen-scraping library"
name = "beautifulsoup4"
optional = false
python-versions = "*"
version = "4.9.1"
[package.dependencies]
soupsieve = [">1.2", "<2.0"]
[package.extras]
html5lib = ["html5lib"]
lxml = ["lxml"]
[[package]]
category = "main"
description = "Dummy package for Beautiful Soup"
name = "bs4"
optional = false
python-versions = "*"
version = "0.0.1"
[package.dependencies]
beautifulsoup4 = "*"
[[package]]
category = "main"
description = "Python package for providing Mozilla's CA Bundle."
......@@ -246,6 +272,14 @@ optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
version = "1.15.0"
[[package]]
category = "main"
description = "A modern CSS selector implementation for Beautiful Soup."
name = "soupsieve"
optional = false
python-versions = "*"
version = "1.9.6"
[[package]]
category = "main"
description = "A pure-Python library for reading and converting SVG"
......@@ -333,7 +367,7 @@ python-versions = "*"
version = "0.5.1"
[metadata]
content-hash = "c4d1e0d9873b8ffe4b90be174b5d22d86fd8d98be1c91b0a0cef47535c42d29f"
content-hash = "77acde3319e7cd6b3ef1ecb821bec8d79876e05518bc735fdfbfe6a5a55de976"
python-versions = "^3.6"
[metadata.files]
......@@ -345,6 +379,14 @@ backcall = [
{file = "backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255"},
{file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"},
]
beautifulsoup4 = [
{file = "beautifulsoup4-4.9.1-py2-none-any.whl", hash = "sha256:e718f2342e2e099b640a34ab782407b7b676f47ee272d6739e60b8ea23829f2c"},
{file = "beautifulsoup4-4.9.1-py3-none-any.whl", hash = "sha256:a6237df3c32ccfaee4fd201c8f5f9d9df619b93121d01353a64a73ce8c6ef9a8"},
{file = "beautifulsoup4-4.9.1.tar.gz", hash = "sha256:73cc4d115b96f79c7d77c1c7f7a0a8d4c57860d1041df407dd1aae7f07a77fd7"},
]
bs4 = [
{file = "bs4-0.0.1.tar.gz", hash = "sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a"},
]
certifi = [
{file = "certifi-2020.6.20-py2.py3-none-any.whl", hash = "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41"},
{file = "certifi-2020.6.20.tar.gz", hash = "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3"},
......@@ -518,6 +560,10 @@ six = [
{file = "six-1.15.0-py2.py3-none-any.whl", hash = "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"},
{file = "six-1.15.0.tar.gz", hash = "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259"},
]
soupsieve = [
{file = "soupsieve-1.9.6-py2.py3-none-any.whl", hash = "sha256:feb1e937fa26a69e08436aad4a9037cd7e1d4c7212909502ba30701247ff8abd"},
{file = "soupsieve-1.9.6.tar.gz", hash = "sha256:7985bacc98c34923a439967c1a602dc4f1e15f923b6fcf02344184f86cc7efaa"},
]
svglib = [
{file = "svglib-1.0.1.tar.gz", hash = "sha256:ff01593e8c07ea462d3742e1f4141bfa261cbd4400ceb25dfb8fec3508ad0e50"},
]
......
......@@ -10,6 +10,8 @@ python = "^3.6"
requests = "^2.24.0"
tqdm = "^4.49.0"
svglib = "^1.0.1"
bs4 = "^0.0.1"
beautifulsoup4 = "^4.9.1"
[tool.poetry.dev-dependencies]
ipython = "^7.16"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment