#!/usr/bin/python """ Little script to convert Realaudio stream to mp3 & download podcasts To use it you need: - mplayer with win32 codecs - lame to encode the wav in mp3 - please set the path to store the mp3 files Copyright Jkx 2004 / Licence GPL Modifications by James Minchin (http://chaucery.com/) 8-Feb-2006 - updated deprecated mplayer parameters - changed some Unix functions to Python (OS-neutral) functions - added support for .ra streams 25-Feb-2006 - fixed path parameter (temp file still where script runs though) 15-Oct-2006 - fix stream url containing containing querystring, ie. '?' 9-Apr-2007 - added direct naive podcast downloads (gets first entry in feed) - added Ogg Vorbis encoding option 10-Apr-2007 - added optional parameter to set output filename 1-Jul-2007 - fixed podcast to always download first audio found 4-Nov-2007 - fix podcast filenames containing '?' - add optional parameter to set filename prefix on podcasts - fix Real streams containing '&' 25-Nov-2007 - fix podcast title containing '/' - abort when temp filename already in use (eg. when 2 simultanous streams using same name 18-Jan-2009 - convert to Python 3 - add percent download progress for podcasts 15-Feb-2009 - add support for direct rtsp:// links 6-Apr-2010 - add support for downloading from WMV streams - require stream type to be specified - set mp3 name based on name found in stream TODO: os.popen is deprecated -> switch to subprocess """ import os, sys, string, time path = "d:/rip/audio" #os.getcwd() # destination folder mplayer = "c:/multimedia/mplayer/mplayer" # location of MPlayer lame = "c:/multimedia/lame/lame" # location of Lame mp3 encoder ogg = "e:/multimedia/oggenc2.exe" # location of Ogg Vorbis encoder hr = "*" * 78 def rip(audioType, url, filename): """ rip stream, returning official stream name if found """ info = None foundName = False bonus = '' if audioType == 'w': bonus = '-playlist' #os.system('%s -cache 320 -vc dummy -vo null -bandwidth 5000000 -ao pcm:waveheader:fast:file=%s "%s"' % (mplayer, filename, url)) cmd = ('%s -cache 320 -vc dummy -vo null -bandwidth 5000000 -ao pcm:waveheader:fast:file=%s %s "%s"' % (mplayer, filename, bonus, url)) for myOutput in os.popen(cmd).readlines(): #for myOutput in subprocess.Popen(cmd, bufsize=0, stdout=subprocess.PIPE, cwd="c:/TEMP/").stdout.readlines(): print(myOutput, end='') if not foundName: if myOutput.startswith('Clip info:'): foundName = True else: if myOutput.lstrip().startswith('name:'): info = myOutput.strip()[6:] #print("FOUND: clip has the name of |{0}|".format(myOutput.strip()[6:])) return info def encode(filename, path): """ encode wave file with lame and delete it """ #dateBit = time.strftime("%Y%b%d", time.localtime()) #dest= os.path.join(path, filename[:-4] + '_' + dateBit + '.mp3') dest= os.path.join(path, filename[:-4] + '.mp3') #dest= filename[:-4] + '_' + dateBit + '.mp3' # changed from 64bps constant to variable bit rate of quality 8 (9 = lowest) #os.system('%s -b 64 %s %s' % (lame, filename, dest) ) os.system('%s -V 8 --vbr-new %s %s' % (lame, filename, dest) ) #os.system('%s -q 0 -o %s %s' % (ogg, dest, filename) ) #input("removing ... {0}".format(filename)) if os.path.exists(filename): os.remove(filename) #os.remove(os.path.join(path, filename)) def parseSource(sourceFile): """ work out if podcast or Real stream, and find their direct locations """ url = None if sourceFile.startswith('rtsp://'): url = sourceFile elif sourceFile.startswith('http://'): # this is a url not a file from urllib.request import urlopen f = urlopen(sourceFile) data = f.read().decode() f.close() else: try: f = open(sourceFile) data = f.read() f.close() except IOError: print("Unable to open {0}".format(sourceFile)) return None,None if not url: import re #==== look for mp3 podcast matches = re.match(".*?[^<]*(.*?).*?]+url=\"([^\"]*)\".*?", data, re.DOTALL) if matches != None: title = matches.group(1) url = matches.group(2) print("Found podcast '{0}' at\n{1}".format(title, url)) #==== fix title so it only contains valid chars for saving file #==== workaround url containing weird ampersand encoding - (for CBC Spark podcast) return url.replace("&","&"), title.replace("/","_").replace(" ","_").replace(":","_").replace("__","_").replace("?","") #==== no podcast, check for Real stream instead urlMatches = re.match(".*(rtsp://.*)", data) if urlMatches != None: url = urlMatches.group(1).strip() if url: print('Found an rtsp stream at:\n{0}'.format(url)) filename=str.split(url,'/')[-1] qPos = filename.find('?') if qPos != -1: filename = filename[:qPos] if filename.endswith('.rm') or filename.endswith('.ra'): return url,filename[:-3]+'.wav' else: print("Unable to find the .ram file") return None,None else: print("Unable to find the podcast or rtsp stream") return None,None def reportHook(block_count, block_size, total_size): """ fancy display of download progress """ total_kb = total_size//1024 percent = block_count * block_size * 100 // total_size print("[{0}%] {1} kB of {2} kB downloaded\r".format(percent, (block_count * block_size)//1024, total_kb ),end="") def main(): print(hr) audioType = sys.argv[1] if audioType == 'r': #RealAudio url,filename = parseSource(sys.argv[2]) elif audioType == 'w': #WMV url = sys.argv[2] #use random filename for intermediate wave file import uuid filename = str(uuid.uuid4()) + '.wav' else: #podcast url,filename = parseSource(sys.argv[2]) if url and filename: if audioType == 'p': #==== it's a podcast if len(sys.argv) == 4: filename = sys.argv[3] + "_" + filename filename = os.path.join(path, filename) + '.mp3' if os.path.exists(filename): #==== skip if already downloaded print("WARNING: podcast {0} already exists - skipping".format(filename)) input('Hit enter to continue') sys.exit(0) else: print(hr) print("Downloading to {0}...".format(filename)) import urllib # socket.setdefaulttimeout() urllib.request.urlretrieve(url, filename, reporthook=reportHook) print(" Completed ") print(hr) else: #==== it's a Real or WMV stream if len(sys.argv) == 4: filename = sys.argv[3] + '.wav' #filename = os.path.join(path, filename) if os.path.exists(filename): #==== skip if download already in progress print("ERROR: temporary file {0} already exists".format(filename)) input('Hit enter to continue') sys.exit(1) else: print("Ripping file to {0}".format(os.path.join(path,filename))) print(hr) os.chdir(path) info = rip(audioType, url, filename) print(hr) encode(filename, path) filename = filename[:-4] + '.mp3' oldFilename = filename if len(sys.argv) == 4 or info == None: #append date to filename dateBit = time.strftime("%Y%b%d", time.localtime()) filename = filename[:-4] + '_' + dateBit + '.mp3' else: filename = info.replace(" ","_") + '.mp3' print(hr) print("Moving to {0}".format(filename)) if os.path.exists(os.path.join(path, filename)): #==== skip if download already in progress print("ERROR: Destination file already exists") print(hr) input('Hit enter to remove temporary file {0}'.format(oldFilename)) #print(" Removing temp file {0} ".format(oldFilename)) os.remove(os.path.join(path, oldFilename)) sys.exit(1) else: os.rename(os.path.join(path, oldFilename), os.path.join(path, filename)) print("SUCCESS: Download complete") print(hr) else: print("ERROR: Could not find audio to download") input('Hit enter to continue') print(hr) def usage(): """ friendly help message """ me = sys.argv[0] (head, tail) = os.path.split(me) print("Convert realaudio or wmv stream to mp3, or download latest mp3 podcast - usage:\n") print(" {0} format infile [outfile]\n".format(tail)) print("format: r (RealAudio), w (Windows Media) or p (Podcast)") print("infile: local file or url of podcast or audio stream") print("outfile: optional filename to save to\n") print("eg. {0} ramfile.ram AppleWeek".format(tail)) print(" {0} http://XXXX/stream.ram".format(tail)) print(" {0} rtsp://XXXX/stream.ra LinuxToday".format(tail)) print(" {0} http://XXXX/podcast.xml\n".format(tail)) print("Files will be stored in {0}".format(path)) if __name__ == '__main__': if len(sys.argv) < 3: usage() else: if (sys.argv[1] != 'r') and (sys.argv[1] != 'w') and (sys.argv[1] != 'p'): usage() else: main()