#!/usr/bin/env python3 """ Erweiterte Mixcloud RSS Feed Generator mit Audio-Streaming Extrahiert echte Audio-URLs für die Wiedergabe in Podcast-Apps. """ import requests import xml.etree.ElementTree as ET from datetime import datetime import json import time from urllib.parse import quote import argparse import os import yt_dlp from concurrent.futures import ThreadPoolExecutor, as_completed import threading class MixcloudRSSGeneratorPro: def __init__(self, username, output_file="mixcloud_feed.xml", extract_audio=True): self.username = username self.output_file = output_file self.extract_audio = extract_audio self.base_url = "https://api.mixcloud.com" self.user_url = f"{self.base_url}/{username}/" self.audio_cache = {} self.cache_lock = threading.Lock() def get_user_info(self): """Holt Benutzerinformationen von Mixcloud.""" try: response = requests.get(self.user_url) response.raise_for_status() return response.json() except requests.RequestException as e: print(f"Fehler beim Abrufen der Benutzerinformationen: {e}") return None def get_cloudcasts(self, limit=50): """Holt die neuesten Cloudcasts (Tracks) des Benutzers.""" cloudcasts_url = f"{self.user_url}cloudcasts/" params = {"limit": limit} try: response = requests.get(cloudcasts_url, params=params) response.raise_for_status() data = response.json() return data.get("data", []) except requests.RequestException as e: print(f"Fehler beim Abrufen der Cloudcasts: {e}") return [] def extract_audio_url(self, mixcloud_url): """Extrahiert die echte Audio-URL mit yt-dlp.""" if not self.extract_audio: return mixcloud_url # Cache prüfen with self.cache_lock: if mixcloud_url in self.audio_cache: return self.audio_cache[mixcloud_url] try: ydl_opts = { 'quiet': True, 'no_warnings': True, 'format': 'best[ext=m4a]/best', # Bevorzuge m4a für bessere Podcast-Kompatibilität 'extractaudio': False, 'noplaylist': True, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(mixcloud_url, download=False) if info and 'url' in info: audio_url = info['url'] # Cache speichern with self.cache_lock: self.audio_cache[mixcloud_url] = audio_url return audio_url else: print(f"⚠️ Keine Audio-URL gefunden für: {mixcloud_url}") return mixcloud_url except Exception as e: print(f"⚠️ Fehler beim Extrahieren der Audio-URL für {mixcloud_url}: {e}") return mixcloud_url def extract_audio_urls_parallel(self, cloudcasts, max_workers=3): """Extrahiert Audio-URLs parallel für bessere Performance.""" if not self.extract_audio: return cloudcasts print(f"🎵 Extrahiere Audio-URLs für {len(cloudcasts)} Tracks...") def extract_for_cloudcast(cloudcast): mixcloud_url = f"https://www.mixcloud.com{cloudcast.get('key', '')}" audio_url = self.extract_audio_url(mixcloud_url) cloudcast['audio_url'] = audio_url return cloudcast with ThreadPoolExecutor(max_workers=max_workers) as executor: future_to_cloudcast = { executor.submit(extract_for_cloudcast, cloudcast): cloudcast for cloudcast in cloudcasts } completed_cloudcasts = [] for i, future in enumerate(as_completed(future_to_cloudcast), 1): try: cloudcast = future.result() completed_cloudcasts.append(cloudcast) print(f" ✅ {i}/{len(cloudcasts)} - {cloudcast.get('name', 'Unbekannt')}") except Exception as e: cloudcast = future_to_cloudcast[future] cloudcast['audio_url'] = f"https://www.mixcloud.com{cloudcast.get('key', '')}" completed_cloudcasts.append(cloudcast) print(f" ⚠️ {i}/{len(cloudcasts)} - Fehler: {e}") return completed_cloudcasts def format_duration(self, seconds): """Formatiert die Dauer in HH:MM:SS Format.""" hours = seconds // 3600 minutes = (seconds % 3600) // 60 seconds = seconds % 60 return f"{hours:02d}:{minutes:02d}:{seconds:02d}" def get_content_type_and_size(self, url): """Ermittelt Content-Type und Dateigröße einer URL.""" try: response = requests.head(url, timeout=10) content_type = response.headers.get('content-type', 'audio/mpeg') content_length = response.headers.get('content-length') # Fallback für Content-Type basierend auf URL if 'audio' not in content_type: if '.m4a' in url or '.aac' in url: content_type = 'audio/mp4' elif '.mp3' in url: content_type = 'audio/mpeg' else: content_type = 'audio/mpeg' return content_type, content_length except: return 'audio/mpeg', None def create_rss_feed(self): """Erstellt den RSS-Feed aus den Mixcloud-Daten.""" user_info = self.get_user_info() if not user_info: return False cloudcasts = self.get_cloudcasts() if not cloudcasts: print("Keine Cloudcasts gefunden.") return False # Audio-URLs extrahieren wenn aktiviert if self.extract_audio: cloudcasts = self.extract_audio_urls_parallel(cloudcasts) # RSS Root Element rss = ET.Element("rss") rss.set("version", "2.0") rss.set("xmlns:itunes", "http://www.itunes.com/dtds/podcast-1.0.dtd") rss.set("xmlns:content", "http://purl.org/rss/1.0/modules/content/") # Channel Element channel = ET.SubElement(rss, "channel") # Channel Metadaten title = ET.SubElement(channel, "title") title.text = f"{user_info.get('name', self.username)} - Mixcloud Feed" description = ET.SubElement(channel, "description") description.text = user_info.get('biog', f"Mixcloud-Feed von {self.username}") link = ET.SubElement(channel, "link") link.text = f"https://www.mixcloud.com/{self.username}/" language = ET.SubElement(channel, "language") language.text = "de-DE" # iTunes-spezifische Tags itunes_author = ET.SubElement(channel, "itunes:author") itunes_author.text = user_info.get('name', self.username) itunes_summary = ET.SubElement(channel, "itunes:summary") itunes_summary.text = user_info.get('biog', f"Mixcloud-Feed von {self.username}") itunes_category = ET.SubElement(channel, "itunes:category") itunes_category.set("text", "Music") # Explicit Content (für Musik meist nicht nötig) itunes_explicit = ET.SubElement(channel, "itunes:explicit") itunes_explicit.text = "false" # Bild falls vorhanden if user_info.get('pictures', {}).get('large'): image = ET.SubElement(channel, "image") image_url = ET.SubElement(image, "url") image_url.text = user_info['pictures']['large'] image_title = ET.SubElement(image, "title") image_title.text = title.text image_link = ET.SubElement(image, "link") image_link.text = link.text itunes_image = ET.SubElement(channel, "itunes:image") itunes_image.set("href", user_info['pictures']['large']) print(f"📦 Erstelle RSS-Feed mit {len(cloudcasts)} Episoden...") # Items (Episoden) hinzufügen for cloudcast in cloudcasts: item = ET.SubElement(channel, "item") # Titel item_title = ET.SubElement(item, "title") item_title.text = cloudcast.get('name', 'Unbekannter Titel') # Beschreibung item_description = ET.SubElement(item, "description") description_text = cloudcast.get('description', '') if not description_text: description_text = f"Mix von {self.username}" item_description.text = description_text # Link zur Mixcloud-Seite item_link = ET.SubElement(item, "link") item_link.text = cloudcast.get('url', '') # GUID item_guid = ET.SubElement(item, "guid") item_guid.text = cloudcast.get('key', '') item_guid.set("isPermaLink", "false") # Veröffentlichungsdatum item_pubdate = ET.SubElement(item, "pubDate") created_time = cloudcast.get('created_time') if created_time: # Konvertiere ISO-Format zu RFC 2822 dt = datetime.fromisoformat(created_time.replace('Z', '+00:00')) item_pubdate.text = dt.strftime('%a, %d %b %Y %H:%M:%S %z') # Audio-Enclosure mit echter Audio-URL enclosure = ET.SubElement(item, "enclosure") audio_url = cloudcast.get('audio_url', f"https://www.mixcloud.com{cloudcast.get('key', '')}") enclosure.set("url", audio_url) # Content-Type und Größe ermitteln if self.extract_audio and audio_url != f"https://www.mixcloud.com{cloudcast.get('key', '')}": content_type, content_length = self.get_content_type_and_size(audio_url) enclosure.set("type", content_type) if content_length: enclosure.set("length", content_length) else: enclosure.set("type", "audio/mpeg") # Dauer duration = cloudcast.get('audio_length', 0) if duration: item_duration = ET.SubElement(item, "itunes:duration") item_duration.text = self.format_duration(duration) # iTunes-spezifische Tags itunes_title = ET.SubElement(item, "itunes:title") itunes_title.text = item_title.text itunes_summary = ET.SubElement(item, "itunes:summary") itunes_summary.text = description_text itunes_explicit_item = ET.SubElement(item, "itunes:explicit") itunes_explicit_item.text = "false" # Tags hinzufügen tags = cloudcast.get('tags', []) if tags: keywords = ", ".join([tag['name'] for tag in tags[:5]]) # Nur erste 5 Tags itunes_keywords = ET.SubElement(item, "itunes:keywords") itunes_keywords.text = keywords # XML in Datei schreiben tree = ET.ElementTree(rss) ET.indent(tree, space=" ", level=0) try: tree.write(self.output_file, encoding='utf-8', xml_declaration=True) print(f"✅ RSS-Feed erfolgreich erstellt: {self.output_file}") print(f"📊 Anzahl der Episoden: {len(cloudcasts)}") if self.extract_audio: audio_count = sum(1 for c in cloudcasts if c.get('audio_url', '').startswith('http') and 'mixcloud.com' not in c.get('audio_url', '')) print(f"🎵 Direkte Audio-URLs extrahiert: {audio_count}/{len(cloudcasts)}") return True except Exception as e: print(f"❌ Fehler beim Schreiben der XML-Datei: {e}") return False def serve_feed(self, port=8000): """Startet einen einfachen HTTP-Server für den RSS-Feed.""" import http.server import socketserver import os # Wechsle in das Verzeichnis mit der XML-Datei os.chdir(os.path.dirname(os.path.abspath(self.output_file))) handler = http.server.SimpleHTTPRequestHandler try: with socketserver.TCPServer(("", port), handler) as httpd: print(f"🌐 Server läuft auf http://localhost:{port}") print(f"📡 RSS-Feed: http://localhost:{port}/{os.path.basename(self.output_file)}") print("⏹️ Drücke Ctrl+C zum Beenden") httpd.serve_forever() except KeyboardInterrupt: print("\n👋 Server beendet.") except Exception as e: print(f"❌ Fehler beim Starten des Servers: {e}") def main(): parser = argparse.ArgumentParser(description="Erstellt einen RSS-Feed aus Mixcloud-Tracks mit echten Audio-URLs") parser.add_argument("username", help="Mixcloud-Benutzername (z.B. serman_dj)") parser.add_argument("-o", "--output", default="mixcloud_feed.xml", help="Ausgabedatei für den RSS-Feed (Standard: mixcloud_feed.xml)") parser.add_argument("-l", "--limit", type=int, default=50, help="Anzahl der zu holenden Tracks (Standard: 50)") parser.add_argument("--no-audio", action="store_true", help="Deaktiviert die Audio-URL-Extraktion (nur Mixcloud-Links)") parser.add_argument("--serve", action="store_true", help="Startet einen HTTP-Server für den RSS-Feed") parser.add_argument("--port", type=int, default=8000, help="Port für den HTTP-Server (Standard: 8000)") args = parser.parse_args() extract_audio = not args.no_audio generator = MixcloudRSSGeneratorPro(args.username, args.output, extract_audio) print(f"🎵 Erstelle RSS-Feed für Mixcloud-User: {args.username}") if extract_audio: print("🔧 Audio-URL-Extraktion aktiviert (kann einige Minuten dauern)") else: print("⚡ Schnellmodus: Nur Mixcloud-Links (keine Audio-Extraktion)") print("-" * 60) success = generator.create_rss_feed() if success and args.serve: generator.serve_feed(args.port) if __name__ == "__main__": main()