#!/usr/bin/env python


# The origins of this code were found somewhere on the net, 
# so I don't know who to credit for it.
# Since I've modified it quite extensively, the GPL should cover it.
#
# This program is released under the GNU GPL, version 2 or later.
# See http://www.gnu.org/licenses/gpl.html
#
#
# This code is useful for reverse-engineering and exploring
# Apples iPod iTunesDB file format
# The iTunesDB is the primary database for the iPod. It contains all
# information about the songs that the iPod is capable of playing, as
# well as the playlists. It's never written to by the Apple iPod firmware.
# During an autosync, iTunes completely overwrites this file.
# For more tech information about the iPod and it's files, see the WikiPodLinux online.



import struct, sys, os, time, codecs, string
(utf_encode, utf_decode, sr, sw) = codecs.lookup('utf-16-le')

global songs_dict, verbose, uencoding
verbose=0
uencoding='utf8'	# 'latin-1' or 'utf8'
songs_dict = {}


class mh_record:
	"""Parent class for all record types."""
	def __init__(self, type, rec_len, hdr, offset, db, level):
		self.type = type
		self.rec_len = rec_len
		self.hdr = hdr
		self.data_start = offset
		self.db = db
		self.level = level

	def read_record(self):
		f = {'mhbd' : mhbd_record,
			 'mhsd' : mhsd_record,
			 'mhit' : mhit_record,
			 'mhod' : mhod_record,
			 'mhlt' : mhlt_record,
			 'mhlp' : mhlp_record,
			 'mhyp' : mhyp_record,
			 'mhip' : mhip_record}

		# header: 4 bytes record type (string) + 4 bytes this record length (le long)
		hdr_format = '<4sL'
		hdr_size = struct.calcsize(hdr_format)
		hdr = self.db.read(hdr_size)
		(type, rec_len) = struct.unpack(hdr_format, hdr)
		hdr = self.db.read(rec_len-hdr_size)
		data_start = self.db.tell()

		if verbose>0:
			print '%s%i instancing %s' % (' '*self.level, self.level, type)
		new_rec = f[type](type, rec_len, hdr, data_start, self.db, self.level+1)
		return new_rec


	def fill_in(self, nb_rec):
		self.record = []
		self.db.seek(self.data_start)
		for i in range(nb_rec):
			if verbose>0:
				print '%s %s- rec %d/%d' % (' '*self.level, self.type, i, nb_rec)
			self.record.append(self.read_record())

	def display(self):
		print self.hdr


class mhbd_record(mh_record):
	"""Only one of these per DB file."""
	def __init__(self, type, rec_len, hdr, offset, db, level):
		mh_record.__init__(self, type, rec_len, hdr, offset, db, level)
		self.format = '<LLLL8s'
		pad_bytes=len(self.hdr)-struct.calcsize(self.format)
		self.format = self.format + str(pad_bytes) + 's'
		(composite_size, u1, iTunes_version_number, n_childrecs, idb_id, padding) = struct.unpack(self.format, self.hdr)

		if (n_childrecs != 2):
			if verbose>0:
				print('%i (should be 2)' % (n_childrecs))
				print('Warning: Strange mhbd_record')
		id_num=long(0)
		for i in range(8):
			id_num=id_num+(ord(idb_id[i])*256*i)
		self.idb_id=id_num
		self.data_size = composite_size - rec_len
		self.fill_in(2)
			
		
class mhsd_record(mh_record):
	"""Represents a playlist holder, holds lists of songs (MHLT) or playlists (MHLP)."""
	def __init__(self, type, rec_len, hdr, offset, db, level):
		mh_record.__init__(self, type, rec_len, hdr, offset, db, level)
		self.format = '<LL'
		pad_bytes=len(self.hdr)-struct.calcsize(self.format)
		self.format = self.format + str(pad_bytes) + 's'
		(composite_size, list_idx, padding) = struct.unpack(self.format, self.hdr)

		self.data_size = composite_size - rec_len
		self.fill_in(1)

class mhlt_record(mh_record):
	"""Header for a track list. More info in childs records (Mhit-Mhod)."""
	def __init__(self, type, rec_len, hdr, offset, db, level):
		mh_record.__init__(self, type, rec_len, hdr, offset, db, level)
		self.format = '<L'
		pad_bytes=len(self.hdr)-struct.calcsize(self.format)
		self.format = self.format + str(pad_bytes) + 's'
		(n_songs, padding) = struct.unpack(self.format, self.hdr)

		self.data_size = 0
		self.fill_in(n_songs)

class mhit_record(mh_record):
	"""Song/track file item. Child records hold title, genre, etc. (Mhod)"""
	def __init__(self, type, rec_len, hdr, offset, db, level):
		mh_record.__init__(self, type, rec_len, hdr, offset, db, level)
		self.format = '<LLLLL2sccLLLLLLL2s2sLLLLLLLLLLLL8scc2s2s2sLLL'
#		self.format = '<LLLLLIBBLLLLLLLIILLLLLLLLLLLL8sBBIIILLL'
		pad_bytes=len(self.hdr)-struct.calcsize(self.format)
		self.format = self.format + str(pad_bytes) + 's'
		(composite_size, n_strings, self.key, visible, ft,
		 self.filetype,self.compilation_flag, self.stars_rating,
		 self.file_date, self.file_size, self.file_duration, self.position, self.tracknumber, self.year, self.bitrate,
		 self.u12, self.sample_rate1,
		 self.volume_adjust, self.startplayback, self.stopplayback,self.soundcheck,self.play_count1,self.play_count2,
		 self.last_time_played,self.disc_number,self.total_discs,self.applestore_userid,self.last_modified, self.bookmark_time,
		 self.db_song_id, self.checked, self.application_rating,self.bpm,self.artwork_count,u9,self.artwork_size,u11,
		 self.sample_rate2,
		 padding) = struct.unpack(self.format, self.hdr)

		self.data_size = composite_size - rec_len
		self.fill_in(n_strings)

		if songs_dict.has_key(self.key):
			# song ID, should be unique
			raise 'Error: duplicate song file item (mhit) key - should be unique: '+str(self.key)+'\n - old: '+songs_dict[self.key].record[0].name.encode(uencoding,'replace')+'\n - new: '+self.record[0].name.encode(uencoding,'replace')
		songs_dict[self.key] = self

	def display(self):
		print 'mhit record %i ' % (self.key)
		
	def display_full(self):
		sec = self.file_duration/1000.0
		m = int (sec/60)
		s = int (sec%60)
		print 'mhit record %08X : (%02d:%02d) %s' % (self.key, m, s, self.record[0].name.encode(uencoding,'replace'))
		print '\tFile size : %3.3f MB' % (self.file_size / (1024.0*1024.0))
		#date_offset = long(-2082848400.0)+time.timezone
		#print '\tFile date : %s' % (time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(self.file_date+date_offset)))
		print '\tTrack number %i of %i' % (self.position,self.tracknumber)
		print '\tYear: %i' % (self.year)
		print '\tBitrate: %i' % (self.bitrate)
		sec = self.startplayback/1000.0
		m = int (sec/60)
		s = int (sec%60)
		print '\tstart playback: (%02d:%02d)' % (m,s)
		sec = self.stopplayback/1000.0
		m = int (sec/60)
		s = int (sec%60)
		print '\tstop playback: (%02d:%02d)' % (m,s)
		for r in self.record:
			print '\t\t%s: \"%s\"' % (r.typedict[r.type],r.name.encode(uencoding,'replace'))
			#print '\t\t\"%s\"' % (self.name.encode(uencoding,'replace'))
			#print r.display()


class mhod_record(mh_record):
	"""This record type holds string data, typically info associated with a song."""
	def __init__(self, type, rec_len, hdr, offset, db, level):
		mh_record.__init__(self, type, rec_len, hdr, offset, db, level)
		self.typedict = {1:'Song title',
						 2:'Song filename',
						 3:'Album name',
						 4:'Artist name',
						 5:'Genre',
						 6:'File type',
						 7:'EQ Setting',
						 8:'Comment',
						 9:'',
						 10:'',
						 11:'',
						 12:'Composer',
						 13:'Grouping',
						 50:'Smart Playlist Data',
						 51:'Smart Playlist Rules',
						 52:'Smart Playlist Index',
						 100:'Column sizing or order indicator'}

#		self.format = '<LLLL'		
#		(composite_size, self.type, u1, u2) = struct.unpack(self.format, self.hdr)
		self.format = '<LL'
		pad_bytes=len(self.hdr)-struct.calcsize(self.format)
		if pad_bytes>0:
			self.format = self.format + str(pad_bytes) + 's'
		(composite_size, self.type,  padding) = struct.unpack(self.format, self.hdr)

		# read the rest of the mhod
		self.data_size = composite_size - rec_len
		self.data = self.db.read(self.data_size)

		# string type mhod
		if self.type<50:
			(position, self.utf_name_len, u3, u4) = struct.unpack('<LLLL', self.data[:16])
			(self.name, self.name_len) = utf_decode(self.data[16:])

	def display(self):
		#print '\t\t\"%s\"' % (self.name.encode(uencoding,'replace'))
		print '\t\t%s: \"%s\"' % (self.typedict[self.type],self.name.encode(uencoding,'replace'))


class mhlp_record(mh_record):
	"""Playlist list (has all playlists as children)"""
	def __init__(self, type, rec_len, hdr, offset, db, level):
		mh_record.__init__(self, type, rec_len, hdr, offset, db, level)
		self.format = '<L'
		pad_bytes=len(self.hdr)-struct.calcsize(self.format)
		self.format = self.format + str(pad_bytes) + 's'
		(self.n_playlists, padding) = struct.unpack(self.format, self.hdr)

		
		self.data_size = 0
		self.fill_in(self.n_playlists)

	def display(self):
		print 'mhlp with %d lists:' % (self.nb)
		for i in range(self.nb):
			print "\t%04d" % (i), self.record[i].name()
		

class mhyp_record(mh_record):
	"""A playlist record, contains records that point to the songs in the play list."""
	def __init__(self, type, rec_len, hdr, offset, db, level):
		mh_record.__init__(self, type, rec_len, hdr, offset, db, level)
		self.format = '<LLLLLLLLL'
		pad_bytes=len(self.hdr)-struct.calcsize(self.format)
		self.format = self.format + str(pad_bytes) + 's'
		(composite_size, self.n_dataobjects, self.n_items, self.playlist_hidden, self.timestamp, self.playlist_id,
		 u3, self.n_mhod_strings, self.n_mhod_libraries, padding) = struct.unpack(self.format, self.hdr)

		self.data_size = composite_size - rec_len
		self.fill_in(2+self.n_items*2)
		
		if self.db.tell() != (offset-rec_len+composite_size):
			print 'Warning : mhyp end not reached'
		self.db.seek(offset-rec_len+composite_size)

	def name(self):
		return self.record[1].name    

	def display(self):
		# rec 0 is mhod with play list's name
		# rec 1 is empty mhod ???
		# after, it's 
		#print 'mhyp sub %d : %s' % (self.subtype, self.record[1].name)
		for i in range(self.n_items):
			print "\t%04d" % (i),
			songs_dict[self.record[2+2*i].ptr].display()

class mhip_record(mh_record):
	"""Playlist item."""
	def __init__(self, type, rec_len, hdr, offset, db, level):
		mh_record.__init__(self, type, rec_len, hdr, offset, db, level)
		self.format = '<LLLLLL'
		pad_bytes=len(self.hdr)-struct.calcsize(self.format)
		self.format = self.format + str(pad_bytes) + 's'
		(composite_size, self.u1, self.correlation_id, self.key, self.track_id, self.timestamp_pi, padding) = struct.unpack(self.format, self.hdr)

		self.data_size = composite_size - rec_len
		self.data = self.db.read(self.data_size)


def testit(filename=u'iTunesDB'):
	# test it
	print '###### - '+filename
	idb = open(filename,'rb')
	hd = mh_record('', 0, '', 0, idb, 0)
	root = hd.read_record()
	idb.close()

	# general info
	print 'Found ',len(songs_dict),' songs in iTunesDB: '+filename

	# first song
	root.record[0].record[0].record[0].display_full()
	#print root.idb_id

	# master playlist
	#root.record[1].record[0].record[0].display()

	# First user playlist
	#root.record[1].record[0].record[1].display()


if __name__=="__main__":

	print '###########'
	fl=os.listdir('db')
	for item in fl:
		if string.find(item,'.iTunesDB')>0:
			testit(os.path.join('db',item))




