jamendo-symlink.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. #!/usr/bin/env python
  2. # Jamendo database dumps can be fetched from: http://img.jamendo.com/data/dbdump_artistalbumtrack.xml.gz
  3. import xml.etree.cElementTree as ElementTree
  4. import sys, gzip, time, os, os.path, urllib, threading
  5. class JamendoSymlink:
  6. def __init__(self, path):
  7. self.music_path = path
  8. if not os.path.exists(os.path.join(path, "link")):
  9. os.mkdir(os.path.join(path, "link"))
  10. def parse(self, dump):
  11. for event, elem in ElementTree.iterparse(dump):
  12. if elem.tag == "artist":
  13. artist = self.proc_artist(elem)
  14. self.make_rules(artist)
  15. def proc_artist(self, elem):
  16. artist = {}
  17. artist["albums"] = []
  18. for artist_e in elem.getchildren():
  19. if artist_e.tag == "name":
  20. artist["name"] = artist_e.text
  21. if artist_e.tag == "Albums":
  22. for album_e in artist_e.getchildren():
  23. artist["albums"].append(self.proc_album(album_e))
  24. return artist
  25. def proc_album(self, elem):
  26. album = {}
  27. album["tracks"] = []
  28. album["name"] = None
  29. for album_e in elem.getchildren():
  30. if album_e.tag == "name":
  31. album["name"] = album_e.text
  32. if album_e.tag == "Tracks":
  33. for track_e in album_e.getchildren():
  34. album["tracks"].append(self.proc_track(track_e))
  35. return album
  36. def proc_track(self, elem):
  37. track = {}
  38. track["id"] = None
  39. track["name"] = None
  40. track["license"] = None
  41. for track_e in elem.getchildren():
  42. if track_e.tag == "id":
  43. track["id"] = int(track_e.text)
  44. if track_e.tag == "name":
  45. track["name"] = track_e.text
  46. if track_e.tag == "license":
  47. track["license"] = track_e.text
  48. return track
  49. def make_rules(self, artist):
  50. for album in artist["albums"]:
  51. for track in album["tracks"]:
  52. if track["id"] and track["name"] and album["name"] and artist["name"] and self.free_license(track["license"]):
  53. filename = "%s-%s-%s" % (artist["name"].replace("/", ""), album["name"].replace("/", ""), track["name"].replace("/", " "))
  54. filename = filename.encode("utf-8")
  55. os.symlink("%s/ogg/%s.ogg" % (self.music_path, filename), "%s/link/%d.ogg2" % (self.music_path, track['id']))
  56. os.symlink("%s/mp3/%s.mp3" % (self.music_path, filename), "%s/link/%d.mp31" % (self.music_path, track['id']))
  57. def free_license(self, license):
  58. return ("http://creativecommons.org/licenses/by-sa" in license or "http://creativecommons.org/licenses/by/" in license or "http://artlibre.org/licence.php/lal.html" in license)
  59. if __name__ == "__main__":
  60. if len(sys.argv) != 3:
  61. print "Usage: jamendo-symlink.py <database dump> /path/to/music_files/"
  62. sys.exit(1)
  63. if sys.argv[1][-2:] == "gz":
  64. dump = gzip.open(sys.argv[1], "r")
  65. else:
  66. dump = open(sys.argv[1], "r")
  67. symlinker = JamendoSymlink(sys.argv[2])
  68. symlinker.parse(dump)