#!/usr/bin/env python

# moveMP3 version 2.2
# copyright April 2006 by Mike Bleyer <mike @ hoc dot net>
# please mail bug reports, comments, etc. to me.
#
# This program is released under the GNU GPL, version 2 or later.
# See http://www.gnu.org/licenses/gpl.html
#
# What it does: this script makes a backup copy of your MP3 files
# from your Apple iPod to another Volume/harddisk. Instead of just simply copying
# the files the way iTunes stores them on the iPod, it rebuilds a
# Directory tree using the iTunes DB on the iPod, so the resulting files
# are named after the following scheme:
#   Artist/Album/Song.mp3
# Should work on MacOS, Windows, Unix, but I cannot test them all after every change.
#
# INSTALLATION: just copy the script to where you want it to live and double-click
# requires Python (with Tkinter) or MacPython, v2.1 or newer,
# see http://www.python.org/download
# or http://www.cwi.nl/~jack/macpython.html
# After installation under MacOS 9, increase the memory size of the
# PythonInterpreter app to 32 MB
#
# USAGE: make sure the iPod is mounted as a drive and QUIT iTunes !
# double-click or run the script from the command line
# after the script is done, run iTunes and import the mp3 files into the library
#
# TODO:
# - nice log GUI dialog instead of print messages (EasyDialogs or something)
# - use a template string for file/directory layout, like "%g/%a/%t - %s"
#
# UPGRADING: from version <1.7
# Since the pathname filter has been changed, the resulting pathnames
# may differ. In these cases the script will fail to recognize a
# file already backed up to your HD and copy it again with a slightly
# different name!!!
#
# KNOWN PROBLEMS:
# - the copying is very slow on MacOS 9, this is a known issue of how
#   MacPython deals with the WaitNextEvent loop
# - the unicode handling should be robust now, but you may end up with
#   meaningless/garbled filenames if strings cannot be decoded properly
#   In this case use the command-line options "dry-run" and "encoding" to
#   try until the result is to your satisfaction
#
# CHANGELOG:
# - new in version 2.2:
#   fixed a bug in iTunesDB decoding
#
# - new in version 2.1:
#   fixed a unicode bug in iTunesDB parsing, now more robust
#
# - new in version 2.0:
#   completely rewrote the iTunesDB parsing, it's faster now
#
# - new in version 1.9:
#   added verbose command line option
#   added force copy command line option
#
# - new in version 1.8:
#   added regular expression pattern file name filter command line option
#   added regular expression pattern genre filter command line option
#
# - new in version 1.7:
#   changed the unicode parsing to support 4 byte UTF-16 characters
#   fixed: stripping leading/trailing whitespace and double spaces from pathnames
#   added support for XPlay toplevel directory, thanks to Howard Jones
#   added "automagical" iPod detection for Windows/MacOSX
#   added some command line options for dry run and different encodings
#   fixed a bug assembling absolute paths on MacOSX
#
# - new in version 1.6:
#   fixed obvious bug in sys.stdout calls (aargh, sorry)
#   why don't I get some email feedback about this?
#
# - new in version 1.5:
#   yet another approach to unicode handling, using the locale setting
#   removed unicode strings where unneccessary
#   replaced prints with stdout/stderr for proper scripting
#   filename length limits now checked according to OS
#
# - new in version 1.4:
#   fixed another bug with Tkinter import and exit
#   workaround for Unicode printing issues (ascii conversion)
#
# - new in version 1.3:
#   fixed pathname handling on Windows and MacOS X
#   added primitive command line args
#
# - new in version 1.2:
#   now runs directly in Windows without modification (tested with MacDrive)
#
# - new in version 1.1:
#   now supports Unicode, thanks to Alex Yu for testing and encouragement
#

import sys,os,string,struct,re,getopt

# boolean flag to indicate platform
isMacOS9=1
try:
	# try to import MacOS9-specific stuff. If it fails, assume Windows/Unix
	import macostools, macfs, EasyDialogs, findertools
	# increase buffer size for copying to 10 MB
	macostools.BUFSIZ=10000000
except:
	isMacOS9=0
	import shutil
	try:
		from Tkinter import *
	except: pass

# init default Unicode string encoding (ascii)
uencoding='ascii'
# check if local python installation forces something else in site.py (default=ascii)
try:
	import site
	uencoding=site.encoding
except: pass

# check system locale for proper Unicode string encoding
import locale
if uencoding=='ascii' and locale.getdefaultlocale()[1]:
	uencoding = locale.getdefaultlocale()[1]

# if we are on MacOSX, we default to UTF8
# because apples python reports 'ISO8859-1' as locale, but MacOSX uses utf8
if sys.platform=='darwin':
	uencoding='utf8'


# global variable defaults
iPodSourceVolume=u''
targetDirectory=u''
topLevelFolder = u'iPod_Control'
verbose=1
dryRun=0
forceCopy=0
nameFilter=None
genreFilter=None

def SafeDecode(uText=u''):
	"""Safe and robust way to convert a unicode string without encoding errors."""
	# This function makes sure that a Unicode string is converted to a valid string that the local OS can handle
	# there must be a better way than this?? How can we pass UTF-16 unicode chars to a system function call??
	# try with locale setting first
	try:
		return uText.encode(uencoding)
	except UnicodeError: pass
	# if this doesn't work, try UTF8
	# commented out because it yields invalid output even though proper 8-bit
	#try:
	#	return uText.encode('utf8')
	#except UnicodeError: pass
	# as last resort, use ascii with unknown chars replaced by underscore '_'
	return uText.encode('ascii', 'replace')


def ReadSongInfo(iTunesDBFilename='iTunesDB'):
	"""Read a dictionary of items from the db file. Return format is 'filename':('Artist','Album','Song')"""
	# init empty dict
	songdict={}
	# make sure db file exists
	if not os.path.isfile(iTunesDBFilename):
		sys.stderr.write('Error: iTunesDB file \"'+iTunesDBFilename+'\" not found.\n')
		return songdict
	if verbose>0:
		sys.stdout.write('Reading iTunesDB file \"'+iTunesDBFilename+'\".\n')
	# read the song data
	bigstring=''
	try:
		dbFile=open(iTunesDBFilename,'rb')
		bigstring=dbFile.read()
		dbFile.close()
	except:
		sys.stderr.write('Error: cannot open iTunesDB file \"'+iTunesDBFilename+'\" for reading.\n')
		return songdict

	# parse the iTunes db file info, it's in UTF-16 Unicode (little endian)
	# this is still somewhat clumsy, it works for now
	# and has the advantage that we don't care about the exact iTunesDB record format
	if verbose>0:
		sys.stdout.write('Parsing iTunesDB file \"'+iTunesDBFilename+'\" for MP3 tracks.\n')
	fileHeader='mhit'
	itemHeader='mhod'
	# the record structure is as follows:
	# a file record (mhit) contains info about a single song file
	# it has several string item child records (mhod) which contain info such as:
	#    song title, artist, album, genre, filetype, filename
	max=len(bigstring)-5
	i=0
	while i<max:
		# we found a filename item
		if bigstring[i:i+4]==fileHeader:
			# seed with defaults, not every song must have all these set
			# but we require them
			mp3Filename=None
			mp3Artist=u'Artist'
			mp3Album=u'Album'
			mp3Title=u'Title'
			mp3Genre=u'Genre'
			# we have a song file item, get its header length
			(songHeadLen,)=struct.unpack('<L',bigstring[i+4:i+8])
			fieldformat = '<LLLLLBBBBLLLLLLLLLLLLLLLLLLLL8sBBHHHLLL'
			#             ' 44444111144444444444444444444 811222444422...'
			field_bytes=struct.calcsize(fieldformat)

			(songCompLen, songItems, songKey, visible, songFileType, ft1, ft2,
			 songCompilation_flag, songStars_rating,
			 songfile_date, songfile_size, songfile_duration, songPosition, songNumber, songYear, songBitrate,
			 songu12, songsample_rate1,
			 songvolume_adjust, songstartplayback, songstopplayback,songsoundcheck,songplay_count1,songplay_count2,
			 songlast_time_played,songdisc_number,songtotal_discs,songapplestore_userid,songlast_modified, songbookmark_time,
			 songdb_song_id, songchecked, songapplication_rating,songbpm,songartwork_count,u9,songartwork_size,u11,
			 songsample_rate2) = struct.unpack(fieldformat, bigstring[i+8:i+8+field_bytes])

			# length of the complete song file record
			#(songCompLen,)=struct.unpack('<L',bigstring[i+8:i+12])
			# number of string items
			#(songItems,)=struct.unpack('<L',bigstring[i+12:i+16])
			# song position on album
			#(songPosition,)=struct.unpack('<L',bigstring[i+44:i+48])
			# number of songs on album
			#(songNumber,)=struct.unpack('<L',bigstring[i+48:i+52])
			# song year
			#(songYear,)=struct.unpack('<L',bigstring[i+52:i+56])
			# song bitrate
			#(songBitrate,)=struct.unpack('<L',bigstring[i+56:i+60])
			# skip to start of first item
			i=i+songHeadLen
			# now read all the string item records
			j=0
			for j in range(songItems):
				# until the next filename item or end of list
				if (bigstring[i:i+4]==itemHeader):
					# we have a string item, get its full length
					(itemCompLen,)=struct.unpack('<L',bigstring[i+8:i+12])
					# get its type
					(itemType,)=struct.unpack('<L',bigstring[i+12:i+16])
					# get string length
					(itemLen,)=struct.unpack('<L',bigstring[i+28:i+32])
					# set i to the beginning of next item record
					i=i+itemCompLen
					# get the bytes, if it's really a string depends on type
					itemArray=bigstring[i-itemLen:i]
					# convert to string depending on type
					if itemType==1:
						mp3Title=unicode(itemArray,'utf-16-le')
					elif itemType==2:
						mp3Filename=unicode(itemArray,'utf-16-le')
					elif itemType==3:
						mp3Album=unicode(itemArray,'utf-16-le')
					elif itemType==4:
						mp3Artist=unicode(itemArray,'utf-16-le')
					elif itemType==5:
						mp3Genre=unicode(itemArray,'utf-16-le')
				else:
					sys.stderr.write('Error parsing iTunesDB string records (mhod)\n')
			# safety check number of mhod records
			if j!=(songItems-1):
				sys.stderr.write('Error parsing iTunesDB: number of mhod strings not correct\n')
			# add song file info to dict, if we have a valid file pathname
			if mp3Filename:
				songdict[mp3Filename]=(mp3Artist,mp3Album,mp3Title,mp3Genre)
			# set counter back by one, so we don't skip the next song file
			i=i-1
		i=i+1
	return songdict

# keep regular expression pattern object as global var for speed
specialPattern=re.compile(r'[\/:*?"<>| ]+',re.U)
def CleanPathName(toClean=u''):
	"""Clean Pathname string from illegal chars and limit to length."""
	# to make this work on Mac OS 9, Windows and Unix, we filter out all of the following chars \/:*?"<>|
	# additional filter to remove leading/trailing whitespace (Windows doesn't like it)
	result=string.strip(specialPattern.sub(u' ',toClean))
	return result

def BuildTreeCopy(songdict={}):
	"""Copy files and build directory tree."""
	if verbose>0:
		sys.stdout.write('Copying MP3 files from iPod \"'+iPodSourceVolume+'\" to \"'+targetDirectory+'\".\n')
		if nameFilter:
			sys.stdout.write('  Using name filter pattern \"'+nameFilter.pattern+'\".\n')
		if genreFilter:
			sys.stdout.write('  Using genre filter pattern \"'+genreFilter.pattern+'\".\n')
	max=len(songdict)
	i=0
	# limit the filename length, on Windows NT and Unix this is 255 chars, on MacOS9 31 chars
	maxfilenamelen=254
	if sys.platform=='mac':
		maxfilenamelen=31
	for mp3Filename in songdict.keys():
		i=i+1
		mp3SourcePath=''
		# rebuild iPod file path for current OS specific path separators (:\/)
		# filepath must be relative not absolute, otherwise os.path.join hickups on unix
		pathparts=string.split(mp3Filename[1:],':')
		# check and replace topLevelFolder for XPlay
		if topLevelFolder!='iPod_Control':
			pathparts[0]=topLevelFolder
		mp3SourcePath=string.join(pathparts,os.sep)

		# get track info and build a valid target pathname
		mp3Artist,mp3Album,mp3Title,mp3Genre=songdict[mp3Filename]
		# clean strings from illegal chars and convert from unicode
		mp3Artist=SafeDecode(CleanPathName(mp3Artist))
		mp3Album=SafeDecode(CleanPathName(mp3Album))
		mp3Title=SafeDecode(CleanPathName(mp3Title))
		# assemble target path dir and filename
		mp3TargetDir=os.path.join(targetDirectory,mp3Artist[:maxfilenamelen],mp3Album[:maxfilenamelen])
		mp3TargetFile=os.path.join(mp3TargetDir,mp3Title[:maxfilenamelen-4]+'.mp3')
		# check for name regular expression pattern filter, if yes apply
		if nameFilter and not nameFilter.search(mp3TargetFile):
			if verbose>2:
				sys.stdout.write('['+str(i)+'/'+str(max)+'] skipping, no match name pattern: \"'+mp3TargetFile+'\"\n')
			continue
		# check for genre regular expression pattern filter, if yes apply
		if genreFilter and not genreFilter.search(mp3Genre):
			if verbose>2:
				sys.stdout.write('['+str(i)+'/'+str(max)+'] skipping, no match genre pattern: \"'+mp3TargetFile+'\"\n')
			continue
		# make sure we can actually read source file, skip otherwise
		# this safety catch is in here because some 3rd party Music softwares (e.g. Xplay)
		# seem to muck around in the iPod DB in a way that causes problems
		if not os.path.isfile(os.path.join(iPodSourceVolume,mp3SourcePath)):
			if verbose>0:
				sys.stdout.write('['+str(i)+'/'+str(max)+'] Warning: source file \"'+os.path.join(iPodSourceVolume,mp3SourcePath)+'\"')
				sys.stdout.write(' does not exist, iPod corrupted? Skipping...\n')
			continue
		else:
			# check if file already exists on target volume, if yes, skip unless forceCopy
			if os.path.isfile(mp3TargetFile) and not forceCopy:
				if verbose>1:
					sys.stdout.write('['+str(i)+'/'+str(max)+'] skipping, file exists: \"'+mp3TargetFile+'\"\n')
				continue
			else:
				if verbose>0:
					sys.stdout.write('['+str(i)+'/'+str(max)+'] copy from \"'+os.path.join(iPodSourceVolume,mp3SourcePath)+'\"')
					sys.stdout.write(' to \"'+mp3TargetFile+'\"\n')
			# if dry run, no copy, just message
			if not dryRun:
				try:
					if isMacOS9:
						# macostools.copy creates directory on-the-fly and preserves TYPE/CREATOR etc.
						macostools.copy(os.path.join(iPodSourceVolume,mp3SourcePath), mp3TargetFile,1)
						#findertools.copy(os.path.join(iPodSourceVolume,mp3Filename), mp3TargetDir)
					else:
						if not os.path.isdir(mp3TargetDir):
							os.makedirs(mp3TargetDir)
						shutil.copy(os.path.join(iPodSourceVolume,mp3SourcePath), mp3TargetFile)
				except:
					sys.stderr.write('Error: could not copy to \"'+mp3TargetFile+'\".')
					sys.exit(2)


def GetSourceTargetDir(source='',target=''):
	"""Set source and target volumes / directories automagically."""
	global iPodSourceVolume, topLevelFolder, targetDirectory
	# init with given values
	if len(source)>0 and len(target)>0:
		if os.path.isdir(source) and os.path.isdir(target):
			iPodSourceVolume=source
			targetDirectory=target
		else:
			sys.stderr.write('Error: command line source/target dir arguments do not exist.\n')
			usage()
			sys.exit(2)
	else:
		# no values given on command line
		# try to "automagically" find the iPod
		if sys.platform=='win32':
			# Win, check available drives
			for lix in range(ord('D'),ord('Z')+1):
				windrive=chr(lix)+u':\\'
				if os.path.exists(windrive):
					# try to find XPlay or iPod_Control directory
					try:
						for winfile in os.listdir(windrive):
							if string.find(winfile,'XPlay music.')==0 and os.path.isfile(os.path.join(windrive,winfile,'iTunes','iTunesDB')):
								iPodSourceVolume=windrive
								topLevelFolder=winfile
								break
						if topLevelFolder=='iPod_Control' and os.path.isfile(os.path.join(windrive,'iPod_Control','iTunes','iTunesDB')):
							iPodSourceVolume=windrive
							break
						elif topLevelFolder!='iPod_Control':
							break
					except WindowsError: pass
		elif sys.platform=='darwin':
			# Mac OS X
			volumes=os.listdir('/Volumes')
			for volname in volumes:
				if os.path.isdir(os.path.join('/Volumes',volname)):
					if os.path.isdir(os.path.join('/Volumes',volname,topLevelFolder)):
						iPodSourceVolume=os.path.join('/Volumes',volname)
						break
		# get missing values interactively
		# if on MacOS 9, use MacPython Dialogs, otherwise fall back to Tkinter on Windows/Unix
		if isMacOS9:
			fsspec, ok = macfs.GetDirectory('Select iPod Volume:')
			if not ok:
				sys.exit(1)
			iPodSourceVolume = fsspec.as_pathname()
			fsspec, ok = macfs.GetDirectory('Select target music directory:')
			if not ok:
				sys.exit(1)
			targetDirectory = fsspec.as_pathname()
		else:
			try:
				import tkFileDialog
			except:
				sys.stderr.write('Error: Tkinter module not installed.\n')
				sys.stderr.write('	   Install Python with Tkinter or run the script with command line arguments.\n')
				sys.exit(1)
			# skip the iPod selection dialog if we found it already
			if len(iPodSourceVolume)>0:
				if verbose>0:
					sys.stdout.write('Using iPod volume/drive \"'+iPodSourceVolume+'\".\n')
			else:
				iPodSourceVolume=tkFileDialog.askdirectory(title='Select iPod Drive:',initialdir='')
				# check the iPod Volume/Dir (on Mac OS X tkfiledialog returns incomplete path)
				if not os.path.isfile(os.path.join(iPodSourceVolume,topLevelFolder,'iTunes','iTunesDB')):
					if os.path.isfile(os.path.join('/Volumes',iPodSourceVolume,topLevelFolder,'iTunes','iTunesDB')):
						iPodSourceVolume=os.path.join('/Volumes',topLevelFolder,'iTunes','iTunesDB')
					else:
						sys.stderr.write('Error: iPod volume/drive \"'+iPodSourceVolume+'\" is invalid, cannot find iTunes DB.\n')
						sys.exit(1)
			# get target dir
			targetDirectory=tkFileDialog.askdirectory(title='Select target music directory:',initialdir='')
			if len(targetDirectory)>0:
				if verbose>0:
					sys.stdout.write('Using target directory \"'+targetDirectory+'\".\n')
			else:
				# user hit the cancel button, we assume exit wish
				sys.exit(0)

def usage():
	"""Output usage message."""
	sys.stdout.write('Usage: '+sys.argv[0]+' [options] [ <iPod source volume/drive> <target directory> ]\n')
	sys.stdout.write('  Options:\n')
	sys.stdout.write('	[ -h | --help ]  print this message\n')
	sys.stdout.write('	[ -f | --force ]  force copying of files, even if they already exist on target\n')
	sys.stdout.write('	[ -d | --dry-run ]  only print copy messages but do not copy files (overrides --force)\n')
	sys.stdout.write('	[ -v | --verbose=<level> ]  print messages about activity (0=none, 1=default, 3=all)\n')
	sys.stdout.write('	[ -e | --encoding=<unicode encoding> ]  use this encoding instead of system setting\n')
	sys.stdout.write('	[ -n | --name-filter=<regular expression pattern> ]  copy only filenames that match pattern\n')
	sys.stdout.write('	[ -g | --genre-filter=<regular expression pattern> ]  copy only matching genre pattern\n')
	sys.stdout.write('\n')

def main():
	"""Main action."""
	global iPodSourceVolume, topLevelFolder, targetDirectory, forceCopy, verbose, dryRun, uencoding, nameFilter, genreFilter
	# look for command line options: <source> <target>
	opts=[]
	args=[]
	try:
		opts, args = getopt.getopt(sys.argv[1:], "hdfv:e:n:g:", ['help', 'dry-run','force','verbose','encoding=','name-filter=','genre-filter'])
	except getopt.GetoptError:
		# print help information and exit:
		usage()
		sys.exit(2)
	output = None
	for o, a in opts:
		if o in ("-h", "--help"):
			usage()
			sys.exit(0)
		if o in ("-v", "--verbose"):
			try:
				verbose = int(a)
			except:
				sys.stderr.write('Error: verbose level \"'+a+'\" must be positive integer.\n')
				sys.exit(1)
		if o in ("-d", "--dry-run"):
			dryRun = 1
		if o in ("-f", "--force"):
			forceCopy = 1
		if o in ("-n", "--name-filter"):
			try:
				ps = string.strip(a)
				# remove quotes from shell?
				if (ps[0] == '"' and ps[-1] == '"') or (ps[0] == "'" and ps[-1] == "'"):
					ps = ps[1:-1]
				nameFilter=re.compile(ps,re.U)
			except:
				sys.stderr.write('Error: Name filter regular expression pattern \"'+a+'\" is invalid.\n')
				sys.exit(1)
		if o in ("-g", "--genre-filter"):
			try:
				ps = string.strip(a)
				# remove quotes from shell?
				if (ps[0] == '"' and ps[-1] == '"') or (ps[0] == "'" and ps[-1] == "'"):
					ps = ps[1:-1]
				genreFilter=re.compile(ps,re.U|re.I)
			except:
				sys.stderr.write('Error: Genre filter regular expression pattern \"'+a+'\" is invalid.\n')
				sys.exit(1)
		if o in ("-e", "--encoding"):
			try:
				import codecs
				ef,df,srf,swf = codecs.lookup(a)
				uencoding = a
			except LookupError:
				sys.stderr.write('Error: Unicode encoding \"'+a+'\" is unknown or unavailable.\n')
				sys.exit(1)

	if len(args)==2:
		# set source, target
		GetSourceTargetDir(args[0],args[1])
	else:
		GetSourceTargetDir()
	# check unicode and print warning
	if uencoding=='ascii':
		sys.stdout.write('Warning: your Python site encoding is set to \"ascii\".\n')
		sys.stdout.write('		 This may result in garbled file names as non-ascii characters cannot be converted.\n')
		sys.stdout.write('		 Try \"latin-1\", \"utf8\" or your platform encoding instead for better results.\n')
	else:
		if verbose>0:
			sys.stdout.write('Using local encoding \"'+uencoding+'\".\n')
	mytracks=ReadSongInfo(os.path.join(iPodSourceVolume,topLevelFolder,'iTunes','iTunesDB'))
	if verbose>0:
		sys.stdout.write('Found '+str(len(mytracks))+' MP3 tracks.\n')
	BuildTreeCopy(mytracks)


if __name__=="__main__":
	main()

