Package musicbrainz2 :: Module wsxml
[frames] | no frames]

Source Code for Module musicbrainz2.wsxml

   1  """A parser for the Music Metadata XML Format (MMD). 
   2   
   3  This module contains L{MbXmlParser}, which parses the U{Music Metadata XML 
   4  Format (MMD) <http://musicbrainz.org/development/mmd/>} returned by the 
   5  MusicBrainz webservice.  
   6   
   7  There are also DOM helper functions in this module used by the parser which 
   8  probably aren't useful to users. 
   9  """ 
  10  __revision__ = '$Id: wsxml.py 9724 2008-03-09 08:34:11Z matt $' 
  11   
  12  import re 
  13  import logging 
  14  import urlparse 
  15  import xml.dom.minidom 
  16  import xml.sax.saxutils as saxutils  
  17  from xml.parsers.expat import ExpatError 
  18  from xml.dom import DOMException 
  19   
  20  import musicbrainz2.utils as mbutils 
  21  import musicbrainz2.model as model 
  22  from musicbrainz2.model import NS_MMD_1, NS_REL_1, NS_EXT_1 
  23   
  24  __all__ = [ 
  25          'DefaultFactory', 'Metadata', 'ParseError', 
  26          'MbXmlParser', 'MbXmlWriter', 
  27          'AbstractResult', 
  28          'ArtistResult', 'ReleaseResult', 'TrackResult', 'LabelResult' 
  29  ] 
  30   
  31   
32 -class DefaultFactory(object):
33 """A factory to instantiate classes from the domain model. 34 35 This factory may be used to create objects from L{musicbrainz2.model}. 36 """
37 - def newArtist(self): return model.Artist()
38 - def newRelease(self): return model.Release()
39 - def newTrack(self): return model.Track()
40 - def newRelation(self): return model.Relation()
41 - def newReleaseEvent(self): return model.ReleaseEvent()
42 - def newDisc(self): return model.Disc()
43 - def newArtistAlias(self): return model.ArtistAlias()
44 - def newUser(self): return model.User()
45 - def newLabel(self): return model.Label()
46 - def newLabelAlias(self): return model.LabelAlias()
47 - def newTag(self): return model.Tag()
48 49
50 -class ParseError(Exception):
51 """Exception to be thrown if a parse error occurs. 52 53 The C{'msg'} attribute contains a printable error message, C{'reason'} 54 is the lower level exception that was raised. 55 """ 56
57 - def __init__(self, msg='Parse Error', reason=None):
58 Exception.__init__(self) 59 self.msg = msg 60 self.reason = reason
61
62 - def __str__(self):
63 return self.msg
64 65
66 -class Metadata(object):
67 """Represents a parsed Music Metadata XML document. 68 69 The Music Metadata XML format is very flexible and may contain a 70 diverse set of data (e.g. an artist, a release and a list of tracks), 71 but usually only a small subset is used (either an artist, a release 72 or a track, or a lists of objects from one class). 73 74 @see: L{MbXmlParser} for reading, and L{MbXmlWriter} for writing 75 Metadata objects 76 """
77 - def __init__(self):
78 self._artist = None 79 self._release = None 80 self._track = None 81 self._label = None 82 self._artistResults = [ ] 83 self._artistResultsOffset = None 84 self._artistResultsCount = None 85 self._releaseResults = [ ] 86 self._releaseResultsOffset = None 87 self._releaseResultsCount = None 88 self._trackResults = [ ] 89 self._trackResultsOffset = None 90 self._trackResultsCount = None 91 self._labelResults = [ ] 92 self._labelResultsOffset = None 93 self._labelResultsCount = None 94 self._tagList = [ ] 95 self._userList = [ ]
96
97 - def getArtist(self):
98 return self._artist
99
100 - def setArtist(self, artist):
101 self._artist = artist
102 103 artist = property(getArtist, setArtist, doc='An Artist object.') 104
105 - def getLabel(self):
106 return self._label
107
108 - def setLabel(self, label):
109 self._label = label
110 111 label = property(getLabel, setLabel, doc='A Label object.') 112
113 - def getRelease(self):
114 return self._release
115
116 - def setRelease(self, release):
117 self._release = release
118 119 release = property(getRelease, setRelease, doc='A Release object.') 120
121 - def getTrack(self):
122 return self._track
123
124 - def setTrack(self, track):
125 self._track = track
126 127 track = property(getTrack, setTrack, doc='A Track object.') 128
129 - def getArtistResults(self):
130 """Returns an artist result list. 131 132 @return: a list of L{ArtistResult} objects. 133 """ 134 return self._artistResults
135 136 artistResults = property(getArtistResults, 137 doc='A list of ArtistResult objects.') 138
139 - def getArtistResultsOffset(self):
140 """Returns the offset of the artist result list. 141 142 The offset is used for paging through the result list. It 143 is zero-based. 144 145 @return: an integer containing the offset, or None 146 147 @see: L{getArtistResults}, L{getArtistResultsCount} 148 """ 149 return self._artistResultsOffset
150
151 - def setArtistResultsOffset(self, value):
152 """Sets the offset of the artist result list. 153 154 @param value: an integer containing the offset, or None 155 156 @see: L{getArtistResultsOffset} 157 """ 158 self._artistResultsOffset = value
159 160 artistResultsOffset = property( 161 getArtistResultsOffset, setArtistResultsOffset, 162 doc='The offset of the artist results.') 163
164 - def getArtistResultsCount(self):
165 """Returns the total number of results available. 166 167 This may or may not match with the number of elements that 168 L{getArtistResults} returns. If the count is higher than 169 the list, it indicates that the list is incomplete. 170 171 @return: an integer containing the count, or None 172 173 @see: L{setArtistResultsCount}, L{getArtistResultsOffset} 174 """ 175 return self._artistResultsCount
176
177 - def setArtistResultsCount(self, value):
178 """Sets the total number of available results. 179 180 @param value: an integer containing the count, or None 181 182 @see: L{getArtistResults}, L{setArtistResultsOffset} 183 """ 184 self._artistResultsCount = value
185 186 artistResultsCount = property( 187 getArtistResultsCount, setArtistResultsCount, 188 doc='The total number of artists results.') 189
190 - def getLabelResults(self):
191 """Returns a label result list. 192 193 @return: a list of L{LabelResult} objects. 194 """ 195 return self._labelResults
196 197 labelResults = property(getLabelResults, 198 doc='A list of LabelResult objects') 199
200 - def getLabelResultsOffset(self):
201 """Returns the offset of the label result list. 202 203 The offset is used for paging through the result list. It 204 is zero-based. 205 206 @return: an integer containing the offset, or None 207 208 @see: L{getLabelResults}, L{getLabelResultsCount} 209 """ 210 return self._labelResultsOffset
211
212 - def setLabelResultsOffset(self, value):
213 """Sets the offset of the label result list. 214 215 @param value: an integer containing the offset, or None 216 217 @see: L{getLabelResultsOffset} 218 """ 219 self._labelResultsOffset = value
220 221 labelResultsOffset = property( 222 getLabelResultsOffset, setLabelResultsOffset, 223 doc='The offset of the label results.') 224
225 - def getLabelResultsCount(self):
226 """Returns the total number of results available. 227 228 This may or may not match with the number of elements that 229 L{getLabelResults} returns. If the count is higher than 230 the list, it indicates that the list is incomplete. 231 232 @return: an integer containing the count, or None 233 234 @see: L{setLabelResultsCount}, L{getLabelResultsOffset} 235 """ 236 return self._labelResultsCount
237
238 - def setLabelResultsCount(self, value):
239 """Sets the total number of available results. 240 241 @param value: an integer containing the count, or None 242 243 @see: L{getLabelResults}, L{setLabelResultsOffset} 244 """ 245 self._labelResultsCount = value
246 247 labelResultsCount = property( 248 getLabelResultsCount, setLabelResultsCount, 249 doc='The total number of label results.') 250
251 - def getReleaseResults(self):
252 """Returns a release result list. 253 254 @return: a list of L{ReleaseResult} objects. 255 """ 256 return self._releaseResults
257 258 releaseResults = property(getReleaseResults, 259 doc='A list of ReleaseResult objects.') 260
261 - def getReleaseResultsOffset(self):
262 """Returns the offset of the release result list. 263 264 The offset is used for paging through the result list. It 265 is zero-based. 266 267 @return: an integer containing the offset, or None 268 269 @see: L{getReleaseResults}, L{getReleaseResultsCount} 270 """ 271 return self._releaseResultsOffset
272
273 - def setReleaseResultsOffset(self, value):
274 """Sets the offset of the release result list. 275 276 @param value: an integer containing the offset, or None 277 278 @see: L{getReleaseResultsOffset} 279 """ 280 self._releaseResultsOffset = value
281 282 releaseResultsOffset = property( 283 getReleaseResultsOffset, setReleaseResultsOffset, 284 doc='The offset of the release results.') 285
286 - def getReleaseResultsCount(self):
287 """Returns the total number of results available. 288 289 This may or may not match with the number of elements that 290 L{getReleaseResults} returns. If the count is higher than 291 the list, it indicates that the list is incomplete. 292 293 @return: an integer containing the count, or None 294 295 @see: L{setReleaseResultsCount}, L{getReleaseResultsOffset} 296 """ 297 return self._releaseResultsCount
298
299 - def setReleaseResultsCount(self, value):
300 """Sets the total number of available results. 301 302 @param value: an integer containing the count, or None 303 304 @see: L{getReleaseResults}, L{setReleaseResultsOffset} 305 """ 306 self._releaseResultsCount = value
307 308 releaseResultsCount = property( 309 getReleaseResultsCount, setReleaseResultsCount, 310 doc='The total number of release results.') 311
312 - def getTrackResults(self):
313 """Returns a track result list. 314 315 @return: a list of L{TrackResult} objects. 316 """ 317 return self._trackResults
318 319 trackResults = property(getTrackResults, 320 doc='A list of TrackResult objects.') 321
322 - def getTrackResultsOffset(self):
323 """Returns the offset of the track result list. 324 325 The offset is used for paging through the result list. It 326 is zero-based. 327 328 @return: an integer containing the offset, or None 329 330 @see: L{getTrackResults}, L{getTrackResultsCount} 331 """ 332 return self._trackResultsOffset
333
334 - def setTrackResultsOffset(self, value):
335 """Sets the offset of the track result list. 336 337 @param value: an integer containing the offset, or None 338 339 @see: L{getTrackResultsOffset} 340 """ 341 self._trackResultsOffset = value
342 343 trackResultsOffset = property( 344 getTrackResultsOffset, setTrackResultsOffset, 345 doc='The offset of the track results.') 346
347 - def getTrackResultsCount(self):
348 """Returns the total number of results available. 349 350 This may or may not match with the number of elements that 351 L{getTrackResults} returns. If the count is higher than 352 the list, it indicates that the list is incomplete. 353 354 @return: an integer containing the count, or None 355 356 @see: L{setTrackResultsCount}, L{getTrackResultsOffset} 357 """ 358 return self._trackResultsCount
359
360 - def setTrackResultsCount(self, value):
361 """Sets the total number of available results. 362 363 @param value: an integer containing the count, or None 364 365 @see: L{getTrackResults}, L{setTrackResultsOffset} 366 """ 367 self._trackResultsCount = value
368 369 trackResultsCount = property( 370 getTrackResultsCount, setTrackResultsCount, 371 doc='The total number of track results.') 372 373
374 - def getTagList(self):
375 """Returns a list of tags. 376 377 @return: a list of L{model.Tag} objects 378 """ 379 return self._tagList
380 381 tagResults = property(getTagList, 382 doc='A list of Tag objects.') 383 384 385 # MusicBrainz extension to the schema
386 - def getUserList(self):
387 """Returns a list of users. 388 389 @return: a list of L{model.User} objects 390 391 @note: This is a MusicBrainz extension. 392 """ 393 return self._userList
394 395 userResults = property(getUserList, 396 doc='A list of User objects.')
397 398
399 -class AbstractResult(object):
400 """The abstract representation of a result. 401 402 A result is an instance of some kind (Artist, Release, ...) 403 associated with a score. 404 """ 405
406 - def __init__(self, score):
407 self._score = score
408
409 - def getScore(self):
410 """Returns the result score. 411 412 The score indicates how good this result matches the search 413 parameters. The higher the value, the better the match. 414 415 @return: an int between 0 and 100 (both inclusive), or None 416 """ 417 return self._score
418
419 - def setScore(self, score):
420 self._score = score
421 422 score = property(getScore, setScore, doc='The relevance score.')
423 424
425 -class ArtistResult(AbstractResult):
426 """Represents an artist result. 427 428 An ArtistResult consists of a I{score} and an artist. The score is a 429 number between 0 and 100, where a higher number indicates a better 430 match. 431 """
432 - def __init__(self, artist, score):
433 super(ArtistResult, self).__init__(score) 434 self._artist = artist
435
436 - def getArtist(self):
437 """Returns an Artist object. 438 439 @return: a L{musicbrainz2.model.Artist} object 440 """ 441 return self._artist
442
443 - def setArtist(self, artist):
444 self._artist = artist
445 446 artist = property(getArtist, setArtist, doc='An Artist object.')
447 448
449 -class ReleaseResult(AbstractResult):
450 """Represents a release result. 451 452 A ReleaseResult consists of a I{score} and a release. The score is a 453 number between 0 and 100, where a higher number indicates a better 454 match. 455 """
456 - def __init__(self, release, score):
457 super(ReleaseResult, self).__init__(score) 458 self._release = release
459
460 - def getRelease(self):
461 """Returns a Release object. 462 463 @return: a L{musicbrainz2.model.Release} object 464 """ 465 return self._release
466
467 - def setRelease(self, release):
468 self._release = release
469 470 release = property(getRelease, setRelease, doc='A Release object.')
471 472
473 -class TrackResult(AbstractResult):
474 """Represents a track result. 475 476 A TrackResult consists of a I{score} and a track. The score is a 477 number between 0 and 100, where a higher number indicates a better 478 match. 479 """
480 - def __init__(self, track, score):
481 super(TrackResult, self).__init__(score) 482 self._track = track
483
484 - def getTrack(self):
485 """Returns a Track object. 486 487 @return: a L{musicbrainz2.model.Track} object 488 """ 489 return self._track
490
491 - def setTrack(self, track):
492 self._track = track
493 494 track = property(getTrack, setTrack, doc='A Track object.')
495 496
497 -class LabelResult(AbstractResult):
498 """Represents a label result. 499 500 An LabelResult consists of a I{score} and a label. The score is a 501 number between 0 and 100, where a higher number indicates a better 502 match. 503 """
504 - def __init__(self, label, score):
505 super(LabelResult, self).__init__(score) 506 self._label = label
507
508 - def getLabel(self):
509 """Returns a Label object. 510 511 @return: a L{musicbrainz2.model.Label} object 512 """ 513 return self._label
514
515 - def setLabel(self, label):
516 self._label = label
517 518 label = property(getLabel, setLabel, doc='A Label object.')
519 520
521 -class MbXmlParser(object):
522 """A parser for the Music Metadata XML format. 523 524 This parser supports all basic features and extensions defined by 525 MusicBrainz, including unlimited document nesting. By default it 526 reads an XML document from a file-like object (stream) and returns 527 an object tree representing the document using classes from 528 L{musicbrainz2.model}. 529 530 The implementation tries to be as permissive as possible. Invalid 531 contents are skipped, but documents have to be well-formed and using 532 the correct namespace. In case of unrecoverable errors, a L{ParseError} 533 exception is raised. 534 535 @see: U{The Music Metadata XML Format 536 <http://musicbrainz.org/development/mmd/>} 537 """ 538
539 - def __init__(self, factory=DefaultFactory()):
540 """Constructor. 541 542 The C{factory} parameter has be an instance of L{DefaultFactory} 543 or a subclass of it. It is used by L{parse} to obtain objects 544 from L{musicbrainz2.model} to build resulting object tree. 545 If you supply your own factory, you have to make sure all 546 returned objects have the same interface as their counterparts 547 from L{musicbrainz2.model}. 548 549 @param factory: an object factory 550 """ 551 self._log = logging.getLogger(str(self.__class__)) 552 self._factory = factory
553
554 - def parse(self, inStream):
555 """Parses the MusicBrainz web service XML. 556 557 Returns a L{Metadata} object representing the parsed XML or 558 raises a L{ParseError} exception if the data was malformed. 559 The parser tries to be liberal and skips invalid content if 560 possible. 561 562 Note that an L{IOError} may be raised if there is a problem 563 reading C{inStream}. 564 565 @param inStream: a file-like object 566 @return: a L{Metadata} object (never None) 567 @raise ParseError: if the document is not valid 568 @raise IOError: if reading from the stream failed 569 """ 570 571 try: 572 doc = xml.dom.minidom.parse(inStream) 573 574 # Try to find the root element. If this isn't an mmd 575 # XML file or the namespace is wrong, this will fail. 576 elems = doc.getElementsByTagNameNS(NS_MMD_1, 'metadata') 577 578 if len(elems) != 0: 579 md = self._createMetadata(elems[0]) 580 else: 581 msg = 'cannot find root element mmd:metadata' 582 self._log.debug('ParseError: ' + msg) 583 raise ParseError(msg) 584 585 doc.unlink() 586 587 return md 588 except ExpatError, e: 589 self._log.debug('ExpatError: ' + str(e)) 590 raise ParseError(msg=str(e), reason=e) 591 except DOMException, e: 592 self._log.debug('DOMException: ' + str(e)) 593 raise ParseError(msg=str(e), reason=e)
594 595
596 - def _createMetadata(self, metadata):
597 md = Metadata() 598 599 for node in _getChildElements(metadata): 600 if _matches(node, 'artist'): 601 md.artist = self._createArtist(node) 602 elif _matches(node, 'release'): 603 md.release = self._createRelease(node) 604 elif _matches(node, 'track'): 605 md.track = self._createTrack(node) 606 elif _matches(node, 'label'): 607 md.label = self._createLabel(node) 608 elif _matches(node, 'artist-list'): 609 (offset, count) = self._getListAttrs(node) 610 md.artistResultsOffset = offset 611 md.artistResultsCount = count 612 self._addArtistResults(node, md.getArtistResults()) 613 elif _matches(node, 'release-list'): 614 (offset, count) = self._getListAttrs(node) 615 md.releaseResultsOffset = offset 616 md.releaseResultsCount = count 617 self._addReleaseResults(node, md.getReleaseResults()) 618 elif _matches(node, 'track-list'): 619 (offset, count) = self._getListAttrs(node) 620 md.trackResultsOffset = offset 621 md.trackResultsCount = count 622 self._addTrackResults(node, md.getTrackResults()) 623 elif _matches(node, 'label-list'): 624 (offset, count) = self._getListAttrs(node) 625 md.labelResultsOffset = offset 626 md.labelResultsCount = count 627 self._addLabelResults(node, md.getLabelResults()) 628 elif _matches(node, 'tag-list'): 629 self._addTagsToList(node, md.getTagList()) 630 elif _matches(node, 'user-list', NS_EXT_1): 631 self._addUsersToList(node, md.getUserList()) 632 633 return md
634 635
636 - def _addArtistResults(self, listNode, resultList):
637 for c in _getChildElements(listNode): 638 artist = self._createArtist(c) 639 score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) 640 if artist is not None: 641 resultList.append(ArtistResult(artist, score))
642
643 - def _addReleaseResults(self, listNode, resultList):
644 for c in _getChildElements(listNode): 645 release = self._createRelease(c) 646 score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) 647 if release is not None: 648 resultList.append(ReleaseResult(release, score))
649
650 - def _addTrackResults(self, listNode, resultList):
651 for c in _getChildElements(listNode): 652 track = self._createTrack(c) 653 score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) 654 if track is not None: 655 resultList.append(TrackResult(track, score))
656
657 - def _addLabelResults(self, listNode, resultList):
658 for c in _getChildElements(listNode): 659 label = self._createLabel(c) 660 score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) 661 if label is not None: 662 resultList.append(LabelResult(label, score))
663
664 - def _addReleasesToList(self, listNode, resultList):
665 self._addToList(listNode, resultList, self._createRelease)
666
667 - def _addTracksToList(self, listNode, resultList):
668 self._addToList(listNode, resultList, self._createTrack)
669
670 - def _addUsersToList(self, listNode, resultList):
671 self._addToList(listNode, resultList, self._createUser)
672
673 - def _addTagsToList(self, listNode, resultList):
674 self._addToList(listNode, resultList, self._createTag)
675
676 - def _addTagsToEntity(self, listNode, entity):
677 for node in _getChildElements(listNode): 678 tag = self._createTag(node) 679 entity.addTag(tag)
680
681 - def _addToList(self, listNode, resultList, creator):
682 for c in _getChildElements(listNode): 683 resultList.append(creator(c))
684
685 - def _getListAttrs(self, listNode):
686 offset = _getIntAttr(listNode, 'offset') 687 count = _getIntAttr(listNode, 'count') 688 return (offset, count)
689
690 - def _createArtist(self, artistNode):
691 artist = self._factory.newArtist() 692 artist.setId(_getIdAttr(artistNode, 'id', 'artist')) 693 artist.setType(_getUriAttr(artistNode, 'type')) 694 695 for node in _getChildElements(artistNode): 696 if _matches(node, 'name'): 697 artist.setName(_getText(node)) 698 elif _matches(node, 'sort-name'): 699 artist.setSortName(_getText(node)) 700 elif _matches(node, 'disambiguation'): 701 artist.setDisambiguation(_getText(node)) 702 elif _matches(node, 'life-span'): 703 artist.setBeginDate(_getDateAttr(node, 'begin')) 704 artist.setEndDate(_getDateAttr(node, 'end')) 705 elif _matches(node, 'alias-list'): 706 self._addArtistAliases(node, artist) 707 elif _matches(node, 'release-list'): 708 (offset, count) = self._getListAttrs(node) 709 artist.setReleasesOffset(offset) 710 artist.setReleasesCount(count) 711 self._addReleasesToList(node, artist.getReleases()) 712 elif _matches(node, 'relation-list'): 713 self._addRelationsToEntity(node, artist) 714 elif _matches(node, 'tag-list'): 715 self._addTagsToEntity(node, artist) 716 717 return artist
718
719 - def _createLabel(self, labelNode):
720 label = self._factory.newLabel() 721 label.setId(_getIdAttr(labelNode, 'id', 'label')) 722 label.setType(_getUriAttr(labelNode, 'type')) 723 724 for node in _getChildElements(labelNode): 725 if _matches(node, 'name'): 726 label.setName(_getText(node)) 727 if _matches(node, 'sort-name'): 728 label.setSortName(_getText(node)) 729 elif _matches(node, 'disambiguation'): 730 label.setDisambiguation(_getText(node)) 731 elif _matches(node, 'label-code'): 732 label.setCode(_getText(node)) 733 elif _matches(node, 'country'): 734 country = _getText(node, '^[A-Z]{2}$') 735 label.setCountry(country) 736 elif _matches(node, 'life-span'): 737 label.setBeginDate(_getDateAttr(node, 'begin')) 738 label.setEndDate(_getDateAttr(node, 'end')) 739 elif _matches(node, 'alias-list'): 740 self._addLabelAliases(node, label) 741 elif _matches(node, 'tag-list'): 742 self._addTagsToEntity(node, label) 743 744 return label
745
746 - def _createRelease(self, releaseNode):
747 release = self._factory.newRelease() 748 release.setId(_getIdAttr(releaseNode, 'id', 'release')) 749 for t in _getUriListAttr(releaseNode, 'type'): 750 release.addType(t) 751 752 for node in _getChildElements(releaseNode): 753 if _matches(node, 'title'): 754 release.setTitle(_getText(node)) 755 elif _matches(node, 'text-representation'): 756 lang = _getAttr(node, 'language', '^[A-Z]{3}$') 757 release.setTextLanguage(lang) 758 script = _getAttr(node, 'script', '^[A-Z][a-z]{3}$') 759 release.setTextScript(script) 760 elif _matches(node, 'asin'): 761 release.setAsin(_getText(node)) 762 elif _matches(node, 'artist'): 763 release.setArtist(self._createArtist(node)) 764 elif _matches(node, 'release-event-list'): 765 self._addReleaseEvents(node, release) 766 elif _matches(node, 'disc-list'): 767 self._addDiscs(node, release) 768 elif _matches(node, 'track-list'): 769 (offset, count) = self._getListAttrs(node) 770 release.setTracksOffset(offset) 771 release.setTracksCount(count) 772 self._addTracksToList(node, release.getTracks()) 773 elif _matches(node, 'relation-list'): 774 self._addRelationsToEntity(node, release) 775 elif _matches(node, 'tag-list'): 776 self._addTagsToEntity(node, release) 777 778 return release
779 780
781 - def _addReleaseEvents(self, releaseListNode, release):
782 for node in _getChildElements(releaseListNode): 783 if _matches(node, 'event'): 784 country = _getAttr(node, 'country', '^[A-Z]{2}$') 785 date = _getDateAttr(node, 'date') 786 catalogNumber = _getAttr(node, 'catalog-number') 787 barcode = _getAttr(node, 'barcode') 788 format = _getUriAttr(node, 'format') 789 790 # The date attribute is mandatory. If it isn't present, 791 # we don't add anything from this release event. 792 if date is not None: 793 event = self._factory.newReleaseEvent() 794 event.setCountry(country) 795 event.setDate(date) 796 event.setCatalogNumber(catalogNumber) 797 event.setBarcode(barcode) 798 event.setFormat(format) 799 800 for subNode in _getChildElements(node): 801 if _matches(subNode, 'label'): 802 event.setLabel(self._createLabel(subNode)) 803 804 release.addReleaseEvent(event)
805 806
807 - def _addDiscs(self, discIdListNode, release):
808 for node in _getChildElements(discIdListNode): 809 if _matches(node, 'disc') and node.hasAttribute('id'): 810 d = self._factory.newDisc() 811 d.setId(node.getAttribute('id')) 812 d.setSectors(_getIntAttr(node, 'sectors', 0)) 813 release.addDisc(d)
814 815
816 - def _addArtistAliases(self, aliasListNode, artist):
817 for node in _getChildElements(aliasListNode): 818 if _matches(node, 'alias'): 819 alias = self._factory.newArtistAlias() 820 self._initializeAlias(alias, node) 821 artist.addAlias(alias)
822 823
824 - def _addLabelAliases(self, aliasListNode, label):
825 for node in _getChildElements(aliasListNode): 826 if _matches(node, 'alias'): 827 alias = self._factory.newLabelAlias() 828 self._initializeAlias(alias, node) 829 label.addAlias(alias)
830 831
832 - def _initializeAlias(self, alias, node):
833 alias.setValue(_getText(node)) 834 alias.setType(_getUriAttr(node, 'type')) 835 alias.setScript(_getAttr(node, 'script', 836 '^[A-Z][a-z]{3}$'))
837 838
839 - def _createTrack(self, trackNode):
840 track = self._factory.newTrack() 841 track.setId(_getIdAttr(trackNode, 'id', 'track')) 842 843 for node in _getChildElements(trackNode): 844 if _matches(node, 'title'): 845 track.setTitle(_getText(node)) 846 elif _matches(node, 'artist'): 847 track.setArtist(self._createArtist(node)) 848 elif _matches(node, 'duration'): 849 track.setDuration(_getPositiveIntText(node)) 850 elif _matches(node, 'release-list'): 851 self._addReleasesToList(node, track.getReleases()) 852 elif _matches(node, 'puid-list'): 853 self._addPuids(node, track) 854 elif _matches(node, 'relation-list'): 855 self._addRelationsToEntity(node, track) 856 elif _matches(node, 'tag-list'): 857 self._addTagsToEntity(node, track) 858 859 return track
860 861 # MusicBrainz extension
862 - def _createUser(self, userNode):
863 user = self._factory.newUser() 864 for t in _getUriListAttr(userNode, 'type', NS_EXT_1): 865 user.addType(t) 866 867 for node in _getChildElements(userNode): 868 if _matches(node, 'name'): 869 user.setName(_getText(node)) 870 elif _matches(node, 'nag', NS_EXT_1): 871 user.setShowNag(_getBooleanAttr(node, 'show')) 872 873 return user
874
875 - def _createTag(self, tagNode):
876 tag = self._factory.newTag() 877 tag.value = _getText(tagNode) 878 tag.count = _getIntAttr(tagNode, 'count') 879 return tag
880 881
882 - def _addPuids(self, puidListNode, track):
883 for node in _getChildElements(puidListNode): 884 if _matches(node, 'puid') and node.hasAttribute('id'): 885 track.addPuid(node.getAttribute('id'))
886 887
888 - def _addRelationsToEntity(self, relationListNode, entity):
889 targetType = _getUriAttr(relationListNode, 'target-type', NS_REL_1) 890 891 if targetType is None: 892 return 893 894 for node in _getChildElements(relationListNode): 895 if _matches(node, 'relation'): 896 rel = self._createRelation(node, targetType) 897 if rel is not None: 898 entity.addRelation(rel)
899 900
901 - def _createRelation(self, relationNode, targetType):
902 relation = self._factory.newRelation() 903 904 relation.setType(_getUriAttr(relationNode, 'type', NS_REL_1)) 905 relation.setTargetType(targetType) 906 resType = _getResourceType(targetType) 907 relation.setTargetId(_getIdAttr(relationNode, 'target', resType)) 908 909 if relation.getType() is None \ 910 or relation.getTargetType() is None \ 911 or relation.getTargetId() is None: 912 return None 913 914 relation.setDirection(_getDirectionAttr(relationNode, 'direction')) 915 relation.setBeginDate(_getDateAttr(relationNode, 'begin')) 916 relation.setEndDate(_getDateAttr(relationNode, 'end')) 917 918 for a in _getUriListAttr(relationNode, 'attributes', NS_REL_1): 919 relation.addAttribute(a) 920 921 target = None 922 children = _getChildElements(relationNode) 923 if len(children) > 0: 924 node = children[0] 925 if _matches(node, 'artist'): 926 target = self._createArtist(node) 927 elif _matches(node, 'release'): 928 target = self._createRelease(node) 929 elif _matches(node, 'track'): 930 target = self._createTrack(node) 931 932 relation.setTarget(target) 933 934 return relation
935 936 937 # 938 # XML output 939 # 940
941 -class _XmlWriter(object):
942 - def __init__(self, outStream, indentAmount=' '):
943 self._out = outStream 944 self._indentAmount = indentAmount 945 self._stack = [ ]
946
947 - def prolog(self, encoding='UTF-8', version='1.0'):
948 pi = '<?xml version="%s" encoding="%s"?>' % (version, encoding) 949 self._out.write(pi + '\n')
950
951 - def start(self, name, attrs={ }):
952 indent = self._getIndention() 953 self._stack.append(name) 954 self._out.write(indent + self._makeTag(name, attrs) + '\n')
955
956 - def end(self):
957 name = self._stack.pop() 958 indent = self._getIndention() 959 self._out.write('%s</%s>\n' % (indent, name))
960
961 - def elem(self, name, value, attrs={ }):
962 # delete attributes with an unset value 963 for (k, v) in attrs.items(): 964 if v is None or v == '': 965 del attrs[k] 966 967 if value is None or value == '': 968 if len(attrs) == 0: 969 return 970 self._out.write(self._getIndention()) 971 self._out.write(self._makeTag(name, attrs, True) + '\n') 972 else: 973 escValue = saxutils.escape(value or '') 974 self._out.write(self._getIndention()) 975 self._out.write(self._makeTag(name, attrs)) 976 self._out.write(escValue) 977 self._out.write('</%s>\n' % name)
978
979 - def _getIndention(self):
980 return self._indentAmount * len(self._stack)
981
982 - def _makeTag(self, name, attrs={ }, close=False):
983 ret = '<' + name 984 985 for (k, v) in attrs.iteritems(): 986 if v is not None: 987 v = saxutils.quoteattr(str(v)) 988 ret += ' %s=%s' % (k, v) 989 990 if close: 991 return ret + '/>' 992 else: 993 return ret + '>'
994 995 996
997 -class MbXmlWriter(object):
998 """Write XML in the Music Metadata XML format.""" 999
1000 - def __init__(self, indentAmount=' '):
1001 """Constructor. 1002 1003 @param indentAmount: the amount of whitespace to use per level 1004 """ 1005 self._indentAmount = indentAmount
1006 1007
1008 - def write(self, outStream, metadata):
1009 """Writes the XML representation of a Metadata object to a file. 1010 1011 @param outStream: an open file-like object 1012 @param metadata: a L{Metadata} object 1013 """ 1014 xml = _XmlWriter(outStream, self._indentAmount) 1015 1016 xml.prolog() 1017 xml.start('metadata', { 1018 'xmlns': NS_MMD_1, 1019 'xmlns:ext': NS_EXT_1, 1020 }) 1021 1022 self._writeArtist(xml, metadata.getArtist()) 1023 self._writeRelease(xml, metadata.getRelease()) 1024 self._writeTrack(xml, metadata.getTrack()) 1025 self._writeLabel(xml, metadata.getLabel()) 1026 1027 if len(metadata.getArtistResults()) > 0: 1028 xml.start('artist-list', { 1029 'offset': metadata.artistResultsOffset, 1030 'count': metadata.artistResultsCount, 1031 }) 1032 for result in metadata.getArtistResults(): 1033 self._writeArtist(xml, result.getArtist(), 1034 result.getScore()) 1035 xml.end() 1036 1037 if len(metadata.getReleaseResults()) > 0: 1038 xml.start('release-list', { 1039 'offset': metadata.releaseResultsOffset, 1040 'count': metadata.releaseResultsCount, 1041 }) 1042 for result in metadata.getReleaseResults(): 1043 self._writeRelease(xml, result.getRelease(), 1044 result.getScore()) 1045 xml.end() 1046 1047 if len(metadata.getTrackResults()) > 0: 1048 xml.start('track-list', { 1049 'offset': metadata.trackResultsOffset, 1050 'count': metadata.trackResultsCount, 1051 }) 1052 for result in metadata.getTrackResults(): 1053 self._writeTrack(xml, result.getTrack(), 1054 result.getScore()) 1055 xml.end() 1056 1057 if len(metadata.getLabelResults()) > 0: 1058 xml.start('label-list', { 1059 'offset': metadata.labelResultsOffset, 1060 'count': metadata.labelResultsCount, 1061 }) 1062 for result in metadata.getLabelResults(): 1063 self._writeLabel(xml, result.getLabel(), 1064 result.getScore()) 1065 xml.end() 1066 1067 xml.end()
1068 1069
1070 - def _writeArtist(self, xml, artist, score=None):
1071 if artist is None: 1072 return 1073 1074 xml.start('artist', { 1075 'id': mbutils.extractUuid(artist.getId()), 1076 'type': mbutils.extractFragment(artist.getType()), 1077 'ext:score': score, 1078 }) 1079 1080 xml.elem('name', artist.getName()) 1081 xml.elem('sort-name', artist.getSortName()) 1082 xml.elem('disambiguation', artist.getDisambiguation()) 1083 xml.elem('life-span', None, { 1084 'begin': artist.getBeginDate(), 1085 'end': artist.getEndDate(), 1086 }) 1087 1088 if len(artist.getAliases()) > 0: 1089 xml.start('alias-list') 1090 for alias in artist.getAliases(): 1091 xml.elem('alias', alias.getValue(), { 1092 'type': alias.getType(), 1093 'script': alias.getScript(), 1094 }) 1095 xml.end() 1096 1097 if len(artist.getReleases()) > 0: 1098 xml.start('release-list') 1099 for release in artist.getReleases(): 1100 self._writeRelease(xml, release) 1101 xml.end() 1102 1103 self._writeRelationList(xml, artist) 1104 # TODO: extensions 1105 1106 xml.end()
1107 1108
1109 - def _writeRelease(self, xml, release, score=None):
1110 if release is None: 1111 return 1112 1113 types = [mbutils.extractFragment(t) for t in release.getTypes()] 1114 typesStr = None 1115 if len(types) > 0: 1116 typesStr = ' '.join(types) 1117 1118 xml.start('release', { 1119 'id': mbutils.extractUuid(release.getId()), 1120 'type': typesStr, 1121 'ext:score': score, 1122 }) 1123 1124 xml.elem('title', release.getTitle()) 1125 xml.elem('text-representation', None, { 1126 'language': release.getTextLanguage(), 1127 'script': release.getTextScript() 1128 }) 1129 xml.elem('asin', release.getAsin()) 1130 1131 self._writeArtist(xml, release.getArtist()) 1132 1133 if len(release.getReleaseEvents()) > 0: 1134 xml.start('release-event-list') 1135 for event in release.getReleaseEvents(): 1136 self._writeReleaseEvent(xml, event) 1137 xml.end() 1138 1139 if len(release.getDiscs()) > 0: 1140 xml.start('disc-list') 1141 for disc in release.getDiscs(): 1142 xml.elem('disc', None, { 'id': disc.getId() }) 1143 xml.end() 1144 1145 if len(release.getTracks()) > 0: 1146 # TODO: count attribute 1147 xml.start('track-list', { 1148 'offset': release.getTracksOffset() 1149 }) 1150 for track in release.getTracks(): 1151 self._writeTrack(xml, track) 1152 xml.end() 1153 1154 self._writeRelationList(xml, release) 1155 # TODO: extensions 1156 1157 xml.end()
1158 1159
1160 - def _writeReleaseEvent(self, xml, event):
1161 xml.start('event', { 1162 'country': event.getCountry(), 1163 'date': event.getDate(), 1164 'catalog-number': event.getCatalogNumber(), 1165 'barcode': event.getBarcode(), 1166 'format': event.getFormat() 1167 }) 1168 1169 self._writeLabel(xml, event.getLabel()) 1170 1171 xml.end()
1172 1173
1174 - def _writeTrack(self, xml, track, score=None):
1175 if track is None: 1176 return 1177 1178 xml.start('track', { 1179 'id': mbutils.extractUuid(track.getId()), 1180 'ext:score': score, 1181 }) 1182 1183 xml.elem('title', track.getTitle()) 1184 xml.elem('duration', str(track.getDuration())) 1185 self._writeArtist(xml, track.getArtist()) 1186 1187 if len(track.getReleases()) > 0: 1188 # TODO: offset + count 1189 xml.start('release-list') 1190 for release in track.getReleases(): 1191 self._writeRelease(xml, release) 1192 xml.end() 1193 1194 if len(track.getPuids()) > 0: 1195 xml.start('puid-list') 1196 for puid in track.getPuids(): 1197 xml.elem('puid', None, { 'id': puid }) 1198 xml.end() 1199 1200 self._writeRelationList(xml, track) 1201 # TODO: extensions 1202 1203 xml.end()
1204 1205
1206 - def _writeLabel(self, xml, label, score=None):
1207 if label is None: 1208 return 1209 1210 xml.start('label', { 1211 'id': mbutils.extractUuid(label.getId()), 1212 'type': mbutils.extractFragment(label.getType()), 1213 'ext:score': score, 1214 }) 1215 1216 xml.elem('name', label.getName()) 1217 xml.elem('sort-name', label.getSortName()) 1218 xml.elem('disambiguation', label.getDisambiguation()) 1219 xml.elem('life-span', None, { 1220 'begin': label.getBeginDate(), 1221 'end': label.getEndDate(), 1222 }) 1223 1224 if len(label.getAliases()) > 0: 1225 xml.start('alias-list') 1226 for alias in label.getAliases(): 1227 xml.elem('alias', alias.getValue(), { 1228 'type': alias.getType(), 1229 'script': alias.getScript(), 1230 }) 1231 xml.end() 1232 1233 # TODO: releases, artists 1234 1235 self._writeRelationList(xml, label) 1236 # TODO: extensions 1237 1238 xml.end()
1239 1240
1241 - def _writeRelationList(self, xml, entity):
1242 for tt in entity.getRelationTargetTypes(): 1243 xml.start('relation-list', { 1244 'target-type': mbutils.extractFragment(tt), 1245 }) 1246 for rel in entity.getRelations(targetType=tt): 1247 self._writeRelation(xml, rel, tt) 1248 xml.end()
1249 1250
1251 - def _writeRelation(self, xml, rel, targetType):
1252 relAttrs = ' '.join([mbutils.extractFragment(a) 1253 for a in rel.getAttributes()]) 1254 1255 if relAttrs == '': 1256 relAttrs = None 1257 1258 attrs = { 1259 'type': mbutils.extractFragment(rel.getType()), 1260 'target': mbutils.extractUuid(rel.getTargetId()), 1261 'direction': rel.getDirection(), 1262 'begin': rel.getBeginDate(), 1263 'end': rel.getBeginDate(), 1264 'attributes': relAttrs, 1265 } 1266 1267 if rel.getTarget() is None: 1268 xml.elem('relation', attrs) 1269 else: 1270 xml.start('relation', attrs) 1271 if targetType == NS_REL_1 + 'Artist': 1272 self._writeArtist(xml, rel.getTarget()) 1273 elif targetType == NS_REL_1 + 'Release': 1274 self._writeRelease(xml, rel.getTarget()) 1275 elif targetType == NS_REL_1 + 'Track': 1276 self._writeTrack(xml, rel.getTarget()) 1277 xml.end()
1278 1279 1280 # 1281 # DOM Utilities 1282 # 1283
1284 -def _matches(node, name, namespace=NS_MMD_1):
1285 """Checks if an xml.dom.Node and a given name and namespace match.""" 1286 1287 if node.localName == name and node.namespaceURI == namespace: 1288 return True 1289 else: 1290 return False
1291 1292
1293 -def _getChildElements(parentNode):
1294 """Returns all direct child elements of the given xml.dom.Node.""" 1295 1296 children = [ ] 1297 for node in parentNode.childNodes: 1298 if node.nodeType == node.ELEMENT_NODE: 1299 children.append(node) 1300 1301 return children
1302 1303
1304 -def _getText(element, regex=None, default=None):
1305 """Returns the text content of the given xml.dom.Element. 1306 1307 This function simply fetches all contained text nodes, so the element 1308 should not contain child elements. 1309 """ 1310 res = '' 1311 for node in element.childNodes: 1312 if node.nodeType == node.TEXT_NODE: 1313 res += node.data 1314 1315 if regex is None or re.match(regex, res): 1316 return res 1317 else: 1318 return default
1319 1320
1321 -def _getPositiveIntText(element):
1322 """Returns the text content of the given xml.dom.Element as an int.""" 1323 1324 res = _getText(element) 1325 1326 if res is None: 1327 return None 1328 1329 try: 1330 return int(res) 1331 except ValueError: 1332 return None
1333 1334
1335 -def _getAttr(element, attrName, regex=None, default=None, ns=None):
1336 """Returns an attribute of the given element. 1337 1338 If there is no attribute with that name or the attribute doesn't 1339 match the regular expression, default is returned. 1340 """ 1341 if element.hasAttributeNS(ns, attrName): 1342 content = element.getAttributeNS(ns, attrName) 1343 1344 if regex is None or re.match(regex, content): 1345 return content 1346 else: 1347 return default 1348 else: 1349 return default
1350 1351
1352 -def _getDateAttr(element, attrName):
1353 """Gets an incomplete date from an attribute.""" 1354 return _getAttr(element, attrName, '^\d+(-\d\d)?(-\d\d)?$')
1355 1356
1357 -def _getIdAttr(element, attrName, typeName):
1358 """Gets an ID from an attribute and turns it into an absolute URI.""" 1359 value = _getAttr(element, attrName) 1360 1361 return _makeAbsoluteUri('http://musicbrainz.org/' + typeName + '/', value)
1362 1363 1364
1365 -def _getIntAttr(element, attrName, min=0, max=None, ns=None):
1366 """Gets an int from an attribute, or None.""" 1367 try: 1368 val = int(_getAttr(element, attrName, ns=ns)) 1369 1370 if max is None: 1371 max = val 1372 1373 if min <= val <= max: 1374 return val 1375 else: 1376 return None 1377 except ValueError: 1378 return None # raised if conversion to int fails 1379 except TypeError: 1380 return None # raised if no such attribute exists
1381 1382
1383 -def _getUriListAttr(element, attrName, prefix=NS_MMD_1):
1384 """Gets a list of URIs from an attribute.""" 1385 if not element.hasAttribute(attrName): 1386 return [ ] 1387 1388 f = lambda x: x != '' 1389 uris = filter(f, re.split('\s+', element.getAttribute(attrName))) 1390 1391 m = lambda x: _makeAbsoluteUri(prefix, x) 1392 uris = map(m, uris) 1393 1394 return uris
1395 1396
1397 -def _getUriAttr(element, attrName, prefix=NS_MMD_1):
1398 """Gets a URI from an attribute. 1399 1400 This also works for space-separated URI lists. In this case, the 1401 first URI is returned. 1402 """ 1403 uris = _getUriListAttr(element, attrName, prefix) 1404 if len(uris) > 0: 1405 return uris[0] 1406 else: 1407 return None
1408 1409
1410 -def _getBooleanAttr(element, attrName):
1411 """Gets a boolean value from an attribute.""" 1412 value = _getAttr(element, attrName) 1413 if value == 'true': 1414 return True 1415 elif value == 'false': 1416 return False 1417 else: 1418 return None
1419 1420
1421 -def _getDirectionAttr(element, attrName):
1422 """Gets the Relation reading direction from an attribute.""" 1423 regex = '^\s*(' + '|'.join(( 1424 model.Relation.DIR_FORWARD, 1425 model.Relation.DIR_BACKWARD)) + ')\s*$' 1426 return _getAttr(element, 'direction', regex, model.Relation.DIR_NONE)
1427 1428
1429 -def _makeAbsoluteUri(prefix, uriStr):
1430 """Creates an absolute URI adding prefix, if necessary.""" 1431 if uriStr is None: 1432 return None 1433 1434 (scheme, netloc, path, params, query, frag) = urlparse.urlparse(uriStr) 1435 1436 if scheme == '' and netloc == '': 1437 return prefix + uriStr 1438 else: 1439 return uriStr
1440 1441
1442 -def _getResourceType(uri):
1443 """Gets the resource type from a URI. 1444 1445 The resource type is the basename of the URI's path. 1446 """ 1447 m = re.match('^' + NS_REL_1 + '(.*)$', uri) 1448 1449 if m: 1450 return m.group(1).lower() 1451 else: 1452 return None
1453 1454 # EOF 1455