""" pyarchive.submission A Python library which provides an interface for uploading files to the Internet Archive. copyright 2004, Creative Commons, Nathan R. Yergler """ __id__ = "$Id$" __version__ = "$Revision$" __copyright__ = '(c) 2004, Creative Commons, Nathan R. Yergler' __license__ = 'licensed under the GNU GPL2' import cStringIO as StringIO import cb_ftp import urllib2 import xml.dom.minidom import xml.sax.saxutils import os.path import string import types from pyarchive.exceptions import MissingParameterException from pyarchive.exceptions import SubmissionError import pyarchive.utils import pyarchive.const from cctag.metadata import metadata class ArchiveItem: """ opensource_movies movies My Home Movie 2:30 Joe Producer """ def __init__(self, identifier, collection, mediatype, title, runtime=None, adder=None, license=None): self.files = [] self.identifier = identifier self.collection = collection self.mediatype = mediatype self.title = title self.metadata = {} self.metadata['runtime'] = runtime self.metadata['adder'] = adder self.metadata['license'] = license if collection == pyarchive.const.OPENSOURCE_AUDIO: self.server = 'audio-uploads.archive.org' elif collection == pyarchive.const.OPENSOURCE_MOVIES: self.server = 'movies-uploads.archive.org' self.archive_url = None def __setitem__(self, key, value): if key == 'subjects': subjects = [n.strip() for n in value.split(',')] self.metadata['subject'] = subjects else: self.metadata[key] = value def __getitem__(self, key): return self.metadata[key] def addFile(self, filename, source, format=None, claim=None): self.files.append(ArchiveFile(filename, source, format, claim)) # set the running time to defaults self.files[-1].runtime = self.metadata['runtime'] # return the added file object return self.files[-1] def metaxml(self, username=None): """Generates _meta.xml to use in submission; returns a file-like object.""" result = StringIO.StringIO() result.write('') # write the required keys result.write(""" %s %s %s """ % (self.title, self.collection, self.mediatype) ) if username is not None: result.write("%s\n" % username) # write any additional metadata for key in self.metadata: if self.metadata[key] is not None: value = self.metadata[key] # check if value is a list if type(value) in [types.ListType, types.TupleType]: # this is a sequence for n in value: result.write('<%s>%s\n' % ( key, xml.sax.saxutils.escape(n), key) ) else: result.write('<%s>%s\n' % ( key, xml.sax.saxutils.escape(value), key) ) result.write('\n') result.seek(0) return result def filesxml(self): """Generates _files.xml to use in submission; returns a file-like object.""" result = StringIO.StringIO() result.write('\n') for archivefile in self.files: result.write(archivefile.fileNode()) result.write('\n') result.seek(0) return result def sanityCheck(self): """Perform sanity checks before submitting to archive.org""" # do some simple sanity checks if None in (self.identifier, self.collection, self.mediatype): raise MissingParameterException if len(self.files) < 1: raise MissingParameterException for archivefile in self.files: archivefile.sanityCheck() def submit(self, username, password, server=None, callback=None): """Submit the files to archive.org""" # set the server/adder (if necessary) if server is not None: self.server = server if self.metadata['adder'] is None: self.metadata['adder'] = username # make sure we're ready to submit self.sanityCheck() # reset the status callback.reset(steps=10) # connect to the FTP server callback.increment(status='connecting to archive.org...') ftp = cb_ftp.FTP(self.server) ftp.login(username, password) # create a new folder for the submission callback.increment(status='creating folder for uploads...') ftp.mkd(self.identifier) ftp.cwd(self.identifier) # upload the XML files callback.increment(status='uploading metadata...') ftp.storlines("STOR %s_meta.xml" % self.identifier, self.metaxml(username)) ftp.storlines("STOR %s_files.xml" % self.identifier, self.filesxml()) # upload each file callback.increment(status='uploading files...') for archivefile in self.files: # determine the local path name and switch directories localpath, fname = os.path.split(archivefile.filename) os.chdir(localpath) # reset the gauge for this file callback.reset(filename=fname) ftp.storbinary("STOR %s" % archivefile.archiveFilename(), file(fname, 'rb'), callback=callback) ftp.quit() # call the import url, check the return result callback.reset(steps=3) callback.increment(status='finishing submission...') importurl = "http://www.archive.org/services/contrib-submit.php?" \ "user_email=%s&server=%s&dir=%s" % ( username, self.server, self.identifier) response = urllib2.urlopen(importurl) callback.increment(status='checking response...') response_dom = xml.dom.minidom.parse(response) result_type = response_dom.getElementsByTagName("result")[0].getAttribute("type") if result_type == 'success': # extract the URL element and store it self.archive_url = response_dom.getElementsByTagName("url")[0].childNodes[0].nodeValue else: # an error occured; raise an exception raise SubmissionError("%s: %s" % ( response_dom.getElementsByTagName("result")[0].getAttribute("code"), response_dom.getElementsByTagName("message")[0].nodeValue )) callback.finish() return self.archive_url class ArchiveFile: def __init__(self, filename, source = None, format = None, claim = None): # make sure the file exists if not(os.path.exists(filename)): # can not find the file; raise an exception raise IOError # set object properties from suppplied parameters self.filename = filename self.runtime = None self.source = source self.format = format self.__claim = claim if self.format is None: self.__detectFormat() def __detectFormat(self): info = pyarchive.utils.getFileInfo(os.path.split(self.filename)[1], self.filename) bitrate = info[2] if bitrate is not None: if bitrate[1]: self.format = pyarchive.const.MP3['VBR'] else: self.format = pyarchive.const.MP3[bitrate[0]] def fileNode(self): """Generates the XML to represent this file in files.xml.""" result = '\n' % ( self.archiveFilename(), self.source) if self.runtime is not None: result = result + '%s\n' % self.runtime if self.__claim is None: try: self.__claim = metadata(self.filename).getClaim() except NotImplementedError, e: pass if self.__claim: result = result + '%s\n' % \ xml.sax.saxutils.escape(self.__claim) result = result + '%s\n\n' % \ xml.sax.saxutils.escape(self.format) return result def sanityCheck(self): """Perform simple sanity checks before uploading.""" # make sure the file exists if not(os.path.exists(self.filename)): # can not find the file; raise an exception raise IOError # ensure necessary parameters have been supplied if None in (self.filename, self.source, self.format): raise MissingParameterException def archiveFilename(self): localpath, fname = os.path.split(self.filename) fname = fname.replace(' ', '_') chars = [n for n in fname if n in (string.ascii_letters + string.digits + '._')] return "".join(chars)