You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

224 lines
9.3 KiB

import requests
import os
import re
import json
import youtube_dl
import sys
from datetime import date
requested_toots = "80"
instance = "https://mastodon.matrix.org"
tag = "fridaydisco"
date_used = date.today().strftime("%Y-%m-%d")
if len(sys.argv) > 1:
for _, arg in enumerate(sys.argv):
if "fridaydisco.py" in arg:
pass
elif "-n=" in arg:
requested_toots = arg[3:]
elif "-i=" in arg:
instance = arg[3:]
elif "-d=" in arg:
date_used = arg[3:]
valid_date = re.search("[0-9]{4}-[0-1][0-9]-[0-3][0-9]",date_used)
if valid_date == None:
print("Wrong data format!")
print("Use YYYY-MM-DD")
exit()
elif "-t=" in arg:
tag = arg[3:]
elif "-h" in arg:
print("Available parameters are:")
print()
print("\tparameter\t\tDescription\t\t\tDefault")
print("\t-n=\t\t\tNumber of fetched toots\t\t-n=80")
print("\t-i=\t\t\tInstance url\t\t\t-i=https://mastodon.matrix.org")
print("\t-d=\t\t\tDate, format: YYY-MM-DD\t\ttodays date")
print("\t-t=\t\t\tHashtag to look for\t\t-t=fridaydisco")
exit()
else:
print("Unknown parameter " + arg)
print("Available parameters are:")
print()
print("\tparameter\t\tDescription\t\t\tDefault")
print("\t-n=\t\t\tNumber of fetched toots\t\t-n=80")
print("\t-i=\t\t\tInstance url\t\t\t-i=https://mastodon.matrix.org")
print("\t-d=\t\t\tDate, format: YYY-MM-DD\t\ttodays date")
print("\t-t=\t\t\tHashtag to look for\t\t-t=fridaydisco")
exit()
dl_options = {
'format': 'bestaudio/best',
'noplaylist': True,
'keep_video': False,
'prefer_ffmpeg': True,
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192', }]
}
dir_name = tag +"-" + date_used
def get_info(url):
with youtube_dl.YoutubeDL(dl_options) as ytdl:
try:
info_dict = ytdl.extract_info(url)
info = {
'title': info_dict["title"],
'duration': info_dict["duration"]
}
return info
except (
youtube_dl.utils.DownloadError,
youtube_dl.utils.ContentTooShortError,
youtube_dl.utils.ExtractorError,
youtube_dl.utils.UnavailableVideoError
) as e:
return { 'title':'Download Failed!','duration':0}
def dl_song(url, title):
dl_options["outtmpl"] = f'{dir_name}/{title}.mp3'
with youtube_dl.YoutubeDL(dl_options) as ytdl:
try:
ytdl.download([url])
return dl_options["outtmpl"]
except (
youtube_dl.utils.DownloadError,
youtube_dl.utils.ContentTooShortError,
youtube_dl.utils.ExtractorError,
youtube_dl.utils.UnavailableVideoError
) as e:
return "Download Failed!"
json_file_name = dir_name + "/" + tag + "-" + date_used + ".json"
links_json = {}
if not os.path.exists(json_file_name):
if not os.path.exists(dir_name):
os.mkdir(dir_name, 0o0755 )
print("Created Directory: " + dir_name)
else:
print("Using Directory: " + dir_name)
links_file = open(json_file_name, 'w')
links_json["songs"] = []
links_json["failed"] = []
print("Creadted File: " + json_file_name)
else:
links_file = open(json_file_name, 'r')
links_json = json.load(links_file)
print("Using Directory: " + dir_name)
print("Using File: " + json_file_name)
failed_links = []
links = []
for elem in links_json["songs"]:
links.append(elem["url"])
youtube_base = "https://www.youtube.com/watch?v="
youtube_base_alt = "https://youtu.be/"
youtube_base_alt_2 = "youtu.be/"
bandcamp_regex = "https://[a-zA-Z0-9_-]*.bandcamp.com/[a-zA-Z0-9/_-]*"
soundcloud_regex = "https://soundcloud.com/[a-zA-Z0-9_-]*/[a-zA-Z0-9_-]*"
print(f"Getting {requested_toots} newest Toots from: {instance}/api/v1/timelines/tag/{tag}")
response = requests.get(f"{instance}/api/v1/timelines/tag/{tag}?limit={requested_toots}")
posts = json.loads(response.text)
print("Looking for public posts with timestamp: " + date_used)
print("Collecting Urls . . .")
dl_counter = 0
for post in posts:
if post["visibility"] == "public":
post_date_end = post["created_at"].find("T")
post_date = post["created_at"][0:post_date_end]
if post_date == date_used:
content = post["content"]
content = content.replace(youtube_base_alt, youtube_base)
content = content.replace(youtube_base_alt_2, youtube_base)
cursor_pos = 0
content_length = len(content)
while cursor_pos < content_length:
is_invidious = False
is_bandcamp = False
is_soundcloud = False
full_link = ""
link_start = content.find(youtube_base, cursor_pos, content_length)
if link_start == -1:
bandcamp_link = re.search(bandcamp_regex, content[cursor_pos:])
soundcloud_link = re.search(soundcloud_regex, content[cursor_pos:])
if bandcamp_link:
full_link = bandcamp_link.group()
link_start = content.find(full_link, cursor_pos, content_length)
is_bandcamp = True
elif soundcloud_link:
is_soundcloud = True
full_link = soundcloud_link.group()
link_start = content.find(full_link, cursor_pos, content_length)
else:
link_start = content.find("/watch?v=", cursor_pos, content_length)
is_invidious = True
if link_start == -1:
break
link_end = content.find("\" ", link_start, content_length)
if content.find("</span><span class=\"invisible\"></span></a></p>", link_start, link_end) != -1:
link_end = content.find("</span><span class=\"invisible\"></span></a></p>", link_start, link_end)
if is_bandcamp:
links_json["failed"].append({
"url":full_link,
"title":"unknown",
"file":"none"
})
failed_links.append(full_link)
else:
if not is_soundcloud:
full_link = content[link_start:link_end]
if is_invidious:
full_link = "https://www.youtube.com" + full_link
if full_link.find("</span><span class=\"invisible\">", 0, len(full_link)) == -1:
link_params_start = full_link.find("&")
if link_params_start != -1:
full_link = full_link[0:link_params_start]
if full_link not in links:
song_info = get_info(full_link)
if song_info["title"] == "Download Failed!":
links_json["failed"].append({
"url":full_link,
"title":"unknown",
"file":"none"
})
failed_links.append(full_link)
else:
song_name_no_spaces = song_info["title"].replace(" ", "_")
song_name_no_spaces = song_name_no_spaces.replace("/","_")
file_name = dl_song(full_link, song_name_no_spaces)
if file_name != "Download Failed!":
links_json["songs"].append({
"title":song_info["title"],
"duration":song_info["duration"],
"url":full_link,
"file":file_name
})
links.append(full_link)
dl_counter += 1
else:
links_json["failed"].append({
"url":full_link,
"title":song_info["title"],
"file":"none"
})
failed_links.append(full_link)
cursor_pos = link_end
with open(json_file_name, 'w') as outfile:
json.dump(links_json, outfile)
print("Downloaded " + str(dl_counter) + " Songs")
print("Directory " + dir_name + " contains " + str(len(links)) + " Songs now")
print(str(len(failed_links)) + " Downloads failed!")
if len(failed_links) > 0:
print("Failed to download:")
for link in failed_links:
print("\t" + link)
print("Done")