20140121

Create a Podcast RSS XML feed file from a directory using Python

So I had a bunch of items in folders that I wanted to access from my phone without having to store on it, also I wanted to use a podcasting program (Downcast for iOS) so I wouldn't have to keep track to which ones I had listened to already.  This is really useful for shows that post a single episode to their feed for a limited time, then remove it.  This was a great opportunity to learn how to create an RSS feed.

I wanted something that I would be able to port to virtually any host machine, so I decided to use Python to create the RSS feed.  Using Python to create the RSS feed makes it so I can easily modify and run the process without having to run builds and update other files.  For ease of running it I actually use the task scheduler to kick off a batch file which calls the Python RSS XML generator.

I have a little Netbook that I use for a home server, it's nice and low power but does most things I need.  I have setup XAMPP as a web host so I can access the RSS feed and audio files.  I have gPodder download the episodes locally then every night I use Windows Task Scheduler to run a batch file that runs a Python script to build the RSS XML files.  Then in the morning Downcast checks for updates and all the new episodes show up, ready to play.

gPodder is really neat because it will also download Youtube video, so I include video podcasts for some of my favorite Youtube channels, this way I can have it download locally so I don't burn through tons of cell data if I want to watch it while I'm out and about.

Anyway, here is the batch file:
[path to Python script]\createRSSFeed.py "[path to directory containing files (mp3,mp4)]" "[path to outputfile]\[output file name].xml" "[Feed Name]" "[Feed Description]" "[path to gPodder download directory]" "[relative web path]" true[use file timestamp]

And here is the Python script (adapted from stuffaboutcode.com):
# update environment to handle Unicode
PYTHONIOENCODING="utf-8"

# import libraries
import os
import os.path
import sys
import datetime
import time
import re
import fnmatch
import urllib2

# import constants from stat library
from stat import * # ST_SIZE ST_MTIME

 # import ID3 tag reader
######from mutagen.id3 import ID3, ID3TimeStamp, TDRC
from time import strptime, strftime

# format date method
def formatDate(dt):
    return dt.strftime("%a, %d %b %Y %H:%M:%S +0000")


# get the item/@type based on file extension
def getItemType(fileExtension):
    if fileExtension == "aac" or fileExtension == "mp3" or fileExtension == "m4a":
         mediaType = "audio/mpeg"
    elif fileExtension == "mp4" or fileExtension == "avi":
         mediaType = "video/mpeg"
    else:
         mediaType = "audio/mpeg"
    return mediaType


# constants
# the podcast name
rssTitle = sys.argv[3]
# the podcast description
rssDescription = sys.argv[4]
# the url where the podcast items will be hosted
rssSiteURL = "http://[domain address]"
# the url of the folder where the items will be stored
strURL1 =  sys.argv[1].replace(sys.argv[5],sys.argv[6]).replace("\\","/")
rssItemURL = rssSiteURL + urllib2.quote(strURL1.encode("utf8"))
# the url to the podcast html file
rssLink = rssSiteURL #+ ""
# url to the podcast image
strURL2 = strURL1 + "/folder.jpg"
rssImageUrl = rssSiteURL + urllib2.quote(strURL2.encode("utf8"))
# the time to live (in minutes)
rssTtl = "60"
# contact details of the web master
rssWebMaster = "[email address]"


#record datetime started
now = datetime.datetime.now()


# command line options
#    - python createRSFeed.py /path/to/podcast/files /path/to/output/rss/gPodder.xml
# directory passed in
rootdir = sys.argv[1]
# output RSS filename
outputFilename = sys.argv[2]


# Main program

# open rss file
outputFile = open(outputFilename, "w")


# write rss header
outputFile.write("\n")
outputFile.write("\n")
outputFile.write("\n")
outputFile.write("" + rssTitle + "\n")
outputFile.write("" + rssDescription + "\n")
outputFile.write("" + rssLink + "\n")
outputFile.write("" + rssTtl + "\n")
outputFile.write("" + rssImageUrl + "" + rssTitle + "" + rssLink + "\n")
outputFile.write("CC 2014\n")
outputFile.write("" + formatDate(now) + "\n")
outputFile.write("" + formatDate(now) + "\n")
outputFile.write("" + rssWebMaster + "\n")


# walk through all files and subfolders
includes = ['*.mp3','*.mp4','*.m4a']
includes = r'|'.join([fnmatch.translate(x) for x in includes])
for path, subFolders, files in os.walk(rootdir):
    # exclude/include files
    files = [os.path.join(path, f) for f in files]
    #files = [f for f in files if not re.match(excludes, f)]
    files = [f for f in files if re.match(includes, f)]
    files.sort(key=lambda x: os.stat(os.path.join(path, x)).st_ctime)
    for file in files:
        #if fnmatch.fnmatch(path,includes):
        # split the file based on "." we use the first part as the title and the extension to work out the media type
        fileNameBits = file.split(".")
        # get the full path of the file
        fullPath = os.path.join(path, file)
        # get the stats for the file
        fileStat = os.stat(fullPath)
        # find the path relative to the starting folder, e.g. /subFolder/file
        relativePath = fullPath[len(rootdir):]
        relativePath = relativePath.replace(sys.argv[5],sys.argv[6]).replace("\\","/")
        relativePath = urllib2.quote(relativePath.encode("utf8"))

        # Extract ID3 info
        #title
        #  audio = ID3(fullPath)
        #     fileTitle = audio["TIT2"].text[0]
        #date
        #  datePos = fileTitle.find(":")
        #  fileDate = fileTitle[(datePos+2):]
        #  fileDate = time.strptime(fileDate, "%B %d, %Y")
        #correct date format in the file's ID3 tag
        #  fileTS = ID3TimeStamp(time.strftime("%Y-%m-%d", fileDate))
        #  audio['TDRC'] = TDRC(0, [fileTS])
        #description
        #  fileDesc = audio["COMM::'eng'"].text[0]
        #  fileDesc = fileDesc.encode('ascii', 'ignore') #converts everything to ASCII prior to writing out
        #  audio.save()

        # write rss item
        outputFile.write("\n")
        outputFile.write("" + fileNameBits[0].replace(sys.argv[1]+ "\\","").replace("_", " ").replace("'","'") + "\n")
        #outputFile.write("A description\n")
        #outputFile.write("" + fileDesc + "\n")
        outputFile.write("" + rssItemURL + relativePath + "\n")
        outputFile.write("" + rssItemURL + relativePath + "\n")
        if sys.argv[7] == "true":
            print(sys.argv[7])
            outputFile.write("" + formatDate(datetime.datetime.fromtimestamp(fileStat[ST_MTIME])) + "\n")
        else: outputFile.write("" + formatDate(now) + "\n")
        outputFile.write("\n")
        outputFile.write("
\n")
     
# write rss footer
outputFile.write("
\n")outputFile.write("
")outputFile.close()
print "complete"

1 comment:

  1. RSS feed search engine Awesome article, it was exceptionally helpful! I simply began in this and I'm becoming more acquainted with it better! Cheers, keep doing awesome!

    ReplyDelete