#!/usr/bin/env python


Copyright © 2008 Dave Bayer. Subject to a BSD-style license.

This Python module is part of the iTunes project.

Import needed system modules:

import os
import sys
import codecs
import xml.etree.ElementTree as ET

appscript is an Apple event bridge that allows Python scripts to control scriptable Mac OS X applications. It is a third-party module that must be installed separately:

from appscript import *


iTunes is an appscript application object, representing the iTunes application.

iTunes = app('iTunes')


get_library returns the master music library playlist.

def get_library():
    return iTunes.library_playlists['Library']


get_folders returns a list of folders that contains other playlists.

def get_folders():
    return iTunes.folder_playlists()


count_folder counts the contents of a folder, to assist in deciding when a folder can be deleted.

def count_folder(folder):
    return folder.count(each=k.playlist)


get_playlists returns a list of custom playlists created by the user, ignoring those playlists created by iTunes by default.

skip is likely to need future updating.

def get_playlists():
    skip = [
        u'Party Shuffle',
        u'90\u2019s Music',
        u'Music Videos',
        u'My Top Rated',
        u'Recently Added',
        u'Recently Played',
        u'Top 25 Most Played',
        u'TV Shows',
    return [x for x in iTunes.user_playlists() if x.name() not in skip]


get_tracks returns a list of the tracks in playlist.

def get_tracks(playlist):
    return playlist.file_tracks()


get_parent returns the parent folder for a playlist if there is one. It is coded as a try block because accessing the parent generates an exception if no parent exists.

One can code either playlist.parent or playlist.parent() or playlist.parent.get() here. The first form will not work here; it postpones access to outside the try block, where exceptions will not be caught. The last two forms appear to be equivalent.

def get_parent(playlist):
        return playlist.parent()
        return None


list_names outputs the name of each list element. list can for example be a list returned by either get_playlists or get_folders.

def list_names(list, out):
    for elem in list:
        out.write('%s\n' % elem.name())


check_track confirms that a track exists on disk. If there is no location attribute, accessing the location throws an AttributeError exception. Otherwise, we check the path, throwing and catching the same exception if the file does not exist.

def check_track(track):
        path = track.location().path
        if not os.path.isfile(path):
        return True
    except AttributeError:
        return False


delete_missing deletes the file tracks in the master music library that do not exist on disk, writing their names to log, a unicode file open for writing.

def delete_missing(log):
    library = get_library()
    for track in get_tracks(library):
        if not check_track(track):
            log.write('- %s - %s\n' % (track.artist(), track.name())) 


freeze_tracks returns a frozenset representing the set of tracks in the list tracks. This result can be hashed, and can be used to decide if two track lists agree as sets. database_ID is the common, unique ID for each track.

def freeze_tracks(tracks):
    return frozenset([x.database_ID() for x in tracks])


catalog_playlists returns a dictionary of playlists, indexed by their track sets.

def catalog_playlists(playlists):
    return dict([(freeze_tracks(get_tracks(x)), x) for x in playlists])


catalog_folders returns a dictionary of folders, indexed by their names.

def catalog_folders(folders):
    return dict([(x.name(), x) for x in folders])


track_tuple creates a tuple from the list keys, standardizing the handling of missing values.

def track_tuple(keys):
    return tuple(map(lambda x: x if x else 'Unknown', keys))


catalog_tracks returns a dictionary of tracks, indexed by artist, album, name tuples. Anomolies are reported to log, a unicode file open for writing.

def catalog_tracks(playlist, log):
    tracks = get_tracks(playlist)
    catalog = dict()
    for track in tracks:
        artist = track.album_artist()
        if not artist:
            artist = track.artist()
        album = track.album()
        name = track.name()
        key = track_tuple([artist, album, name])
        if key in catalog:
            log.write('? duplicate track: %s - %s - %s\n' % key)
            catalog[key] = track
    return catalog


setdict is a set-valued dictionary, with an add method that adjoins val to the set indexed by key.

class setdict(dict):
    def add(self, key, val):
        if not key in self:
            self[key] = set()


setdict2 is a setdict-valued dictionary, with an add method that adjoins val to the set indexed by key1 and key2.

class setdict2(setdict):
    def add(self, key1, key2, val):
        if key1 not in self:
            self[key1] = setdict()
        self[key1].add(key2, val)


file_artist returns a string giving the artist name for track. This is taken to be the name of the enclosing folder of the album folder containing track, except that 'Compilations' is replaced by 'Various'.

If iTunes is permitted to "Keep iTunes Music folder organized", then this result will agree with iTunes' logic for determining album artist names, except for : and / characters that are replaced in file system names. These characters are rarely found in artist names.

def file_artist(track):
        path = track.location().path
        artist = os.path.split(path)[0].split('/')[-2]
        return artist if not artist == 'Compilations' else 'Various'
    except AttributeError:
        return None


album_artist returns a string giving the artist name for the tracks in tracks. It looks for consistent information, and notes anomolies to log, a unicode file open for writing. album is a string used only for this report.

def album_artist(tracks, log, album):
    artists = set([x.artist() for x in tracks])
    album_artists = set([x.album_artist() for x in tracks])
    compilations = set([x.compilation() for x in tracks])
    file_artists = set([file_artist(x) for x in tracks])
    compilation = compilations == set([True])
    if len(album_artists) == 1 and album_artists != set(['']):
        artist = album_artists.pop()
        if compilation:
            log.write('Album %s (%s) is compilation\n' % (album, artist))
    elif len(artists) == 1 and artists != set(['']):
        artist = artists.pop()
        if compilation:
            log.write('Album %s (%s) is compilation\n' % (album, artist))
    elif compilations == set([True]):
        artist = 'Various'
    elif len(file_artists) == 1:
        artist = file_artists.pop()
        log.write('Using file_artist for album %s (%s)\n' % (album, artist))
        artist = 'Unknown'
        log.write('Cannot determine artist for album %s\n' % album)
    return artist


collect_albums returns a setdict2 keyed by artist and album of the tracks in playlist, typically the master music library.

def collect_albums(playlist):
    artists = setdict2()
    for track in get_tracks(playlist):
        artist = file_artist(track)
        if artist:
            album  = track.album()
            artists.add(artist, album, track)
    return artists


year_string returns a separator string indicating the year(s) of the tracks in tracks.

def year_string(tracks):
    years = [x.year() for x in tracks if x.year()]
    years = list(set(years))
    n = len(years)
    if   n == 0:
        years = '-'
    elif n == 1:
        years = '(%d)' % years[0]
    elif n == 2:
        years = '(%d,%d)' % (years[0], years[1])
        years = '(%d-%d)' % (years[0], years[-1])
    return years


title_key returns a sorting key for track titles, converting to lower case and stripping leading "A " and "The " prefixes.

def title_key(pair):
    title = pair[0].lower()
    if title.startswith('a '):
        title = title[2:]
    elif title.startswith('the '):
        title = title[4:]
    return title


track_key returns a sorting key for tracks.

def track_key(track):
    return (track.disc_number(), track.track_number())


list_albums lists the albums found in the master music library. out and log are unicode files open for writing.

def list_albums(out, log):
    artists = collect_albums(get_library()).items()
    for artist, albums in artists:
        out.write('%s\n' % artist)
        albums = albums.items()
        for album, tracks in albums:
            years = year_string(tracks)
            out.write('\t%s %s\n' % (album, years))


list_playlists lists custom playlists created by the user. out is a unicode file open for writing.

def list_playlists(out):
    list_names(get_playlists(), out)


list_folders lists folders that contains other playlists. out is a unicode file open for writing.

def list_folders(out):
    list_names(get_folders(), out)



list_album_playlists lists the playlists that consist of entire albums found in the master music library. out is a unicode file open for writing.

def list_album_playlists(out):
    artists = collect_albums(get_library())
    catalog = catalog_playlists(get_playlists())
    for artist in artists:
        albums = artists[artist]
        for album in albums:
            key = freeze_tracks(albums[album])
            if key in catalog:
                out.write('%s\n' % catalog[key].name())


make creates an iTunes item, with kind kind and name name, in the location loc if this argument is given.

def make(kind, name, loc=iTunes):
    return iTunes.make(new=kind, at=loc, with_properties={k.name: name})


check_tracks checks the track and disc numbers in tracks for consistency, reporting anomolies to log, a unicode file open for writing. artist and album are strings used only for this report.

def check_tracks(tracks, artist, album, log):

Check that all tracks report the same disc count:

        error = "disc count"
        disc_count = set([x.disc_count() for x in tracks])
        assert len(disc_count) == 1
        disc_count = disc_count.pop()
        assert disc_count != 0

Check that all tracks with a given disc number report the same track count, and that all discs and tracks are present:

        track_counts = [set() for x in range(0,disc_count)]
        track_numbers = [list() for x in range(0,disc_count)]
        for track in tracks:
            n = track.disc_number()
        for track_count, track_number in zip(track_counts, track_numbers):
            error = "missing disc"
            assert len(track_count) > 0
            error = "track count"
            assert len(track_count) == 1
            track_count = track_count.pop()
            assert track_count != 0
            error = "track numbers"
            assert track_number == range(1,track_count+1)

Write the first anomoly found to log:

    except AssertionError:
        log.write('! %s - %s (%s)\n' % (artist, album, error))


add_tracks adds the tracks in tracks to playlist, reporting anomolies to log, a unicode file open for writing.

def add_tracks(tracks, playlist, log):
    tracks = list(tracks)
    for track in tracks:
        iTunes.duplicate(track, to=playlist)


delete_playlist deletes the playlist or folder playlist, reporting to log, a unicode file open for writing, and updating the dictionary index if provided.

def delete_playlist(folder, log, index=None):
    name = folder.name()
    if index:
        del index[name]
    log.write('- %s\n' % name)


make_album_playlists makes playlists that consist of entire albums found in the master music library. log is a unicode file open for writing.

If a playlist is found with the same set of tracks but with a different name or location, then it is replaced.

def make_album_playlists(log):

artists is a setdict2 keyed by arist and album of the tracks in the master music library. playlists is a dictionary of playlists, indexed by their track sets. folders is a dictionary of the current folders, indexed by their names.

    artists = collect_albums(get_library())
    playlists = catalog_playlists(get_playlists())
    folders = catalog_folders(get_folders())

We now loop through artists, by artist and album, deleting and creating playlists as needed.

    for artist, albums in artists.iteritems():
        if artist not in folders:
            folder = make(k.folder_playlist, artist)
            log.write('+ %s\n' % artist)
            folders[artist] = folder
            folder = folders[artist]
        for album, tracks in albums.iteritems():
            check_tracks(tracks, artist, album, log)
            artist2 = album_artist(tracks, log, album)
            years = year_string(tracks)
            name = "%s %s %s" % (artist2, years, album)
            key = freeze_tracks(albums[album])
            if key in playlists:
                playlist = playlists[key]
                name2 = playlist.name()
                folder2 = get_parent(playlist)
                if folder2 and folder == folder2 and name == name2:
                delete_playlist(playlist, log)

The logic is tricky here. We want to delete folders that we empty, without pulling the rug out from under ourselves. We're about to add a playlist to folder, so it isn't a candidate for deletion. We may delete a folder here that we want to use later, so we keep the folders dictionary current. (Adding folder to folders earlier was unnecessary, but simplifies reasoning about this code, and may prevent a future bug.)

                if folder2 != folder and not count_folder(folder2):
                    delete_playlist(folder2, log, index=folders)
            playlist = make(k.user_playlist, name, loc=folder)
            add_tracks(tracks, playlist, log)
            log.write('+ %s\n' % name)


dict_from_plist recursively translates the ElementTree element plist into a dictionary, assuming that it was parsed from an Apple property list.

def dict_from_plist(plist):
    xml = dict()
    pairs = zip(plist[::2], plist[1::2])
    for key, val in pairs:
        assert key.tag == 'key'
        if val.tag == 'dict':
            xml[key.text] = dict_from_plist(val)
        elif val.tag == 'array':
            xml[key.text] = map(dict_from_plist, val)
            xml[key.text] = val.text
    return xml


dict_from_xml_playlist translates the Apple property list XML file xmlfile into a dictionary.

def dict_from_xml_playlist(xmlfile):
    tree = ET.parse(xmlfile)
    assert tree.getroot().tag == 'plist'
    plist = tree.find('dict')
    xml = dict_from_plist(plist)
    return xml


catalog_xml_playlists returns a list of pairs, associating playlist names with lists of tracks described by artist, album, name tuples.

def catalog_xml_playlists(xmlfile):
    xml = dict_from_xml_playlist(xmlfile)
    catalog = list()
    library = xml['Tracks']
    playlists = xml['Playlists']
    for playlist in playlists:
        if not 'Playlist Items' in playlist:
        tracks = list()
        track_IDs = [x['Track ID'] for x in playlist['Playlist Items']]
        for ID in track_IDs:
            track = library[ID]
            if 'Album Artist' in track:
                artist = track['Album Artist']
                artist = track['Artist'] if 'Artist' in track else None
            album = track['Album'] if 'Album' in track else None
            name = track['Name'] if 'Name' in track else None
            key = track_tuple([artist, album, name])
        catalog.append((playlist['Name'], tracks))
    return catalog


write_playlist writes the tracks in tracks to out, a unicode file open for writing. playlist is a string; tracks is a list of 3-tuples of strings.

def write_playlist(playlist, tracks, out):
    out.write('\n%s\n\n' % playlist)
    for track in tracks:
        out.write('%s - %s - %s\n' % track)


list_xml_playlists lists the playlists in xmlfile to out, a unicode file open for writing.

def list_xml_playlists(xmlfile, out):
    catalog = catalog_xml_playlists(xmlfile)
    for playlist, tracks in catalog:
        write_playlist(playlist, tracks, out)


diff_playlists compares each playlist in xmlfile1 to the corresponding playlist in xmlfile2, writing tracks found only in xmlfile1, xmlfile2 respectively to out1, out2, unicode files open for writing. Playlists found only in xmlfile2 are not considered.

def diff_playlists(xmlfile1, xmlfile2, out1, out2):
    catalog1 = dict(catalog_xml_playlists(xmlfile1))
    catalog2 = dict(catalog_xml_playlists(xmlfile2))
    for playlist, tracks1 in catalog1.iteritems():
        set1 = set(tracks1)
        set2 = set(catalog2[playlist]) if playlist in catalog2 else set()
        diff1 = list(set1 - set2)
        diff2 = list(set2 - set1)
        write_playlist(playlist, diff1, out1)
        write_playlist(playlist, diff2, out2)


get_args returns the list of command line arguments, or prints a usage message. count is the number of expected arguments.

def get_args(count, usage):
    argv = sys.argv
    if len(argv) == count + 1:
        return argv[1:]
        print 'usage: %s %s' % (argv[0], usage)

open_args, open_arg

open_args, open_arg open for writing the command line arguments as unicode files, or print a usage message. open_args returns a list of files; count is the number of expected arguments. start is the first argument to open; this allows for different handling of initial arguments.

open_arg expects a single command line argument, and returns a single file.

In general, names in iTunes are unicode strings that can include non-ASCII characters, so script output should be directed to such files, not to stdout.

def open_args(count, usage, start=1):
    argv = sys.argv
    if len(argv) == count + 1:
        return [codecs.open(x, 'w', 'utf-8') for x in argv[start:]]
        print 'usage: %s %s' % (argv[0], usage)

def open_arg(usage):
    return open_args(1, usage)[0]