Package cssutils :: Module util
[hide private]
[frames] | no frames]

Source Code for Module cssutils.util

  1  """base classes and helper functions for css and stylesheets packages 
  2  """ 
  3  __all__ = [] 
  4  __docformat__ = 'restructuredtext' 
  5  __version__ = '$Id: util.py 1395 2008-07-27 13:30:13Z cthedot $' 
  6   
  7  import codecs 
  8  from itertools import ifilter 
  9  import re 
 10  import types 
 11  import urllib2 
 12  import xml.dom 
 13  import cssutils 
 14  from tokenize2 import Tokenizer 
15 # COMMENT OUT IF RUNNING THIS TEST STANDALONE! 16 import encutils 17 18 -class Base(object):
19 """ 20 Base class for most CSS and StyleSheets classes 21 22 **Superceded by Base2 which is used for new seq handling class.** 23 See cssutils.util.Base2 24 25 Contains helper methods for inheriting classes helping parsing 26 27 ``_normalize`` is static as used by Preferences. 28 """ 29 __tokenizer2 = Tokenizer() 30 31 _log = cssutils.log 32 _prods = cssutils.tokenize2.CSSProductions 33 34 # for more on shorthand properties see 35 # http://www.dustindiaz.com/css-shorthand/ 36 # format: shorthand: [(propname, mandatorycheck?)*] 37 _SHORTHANDPROPERTIES = { 38 u'background': [], 39 u'background-position': [], 40 u'border': [], 41 u'border-left': [], 42 u'border-right': [], 43 u'border-top': [], 44 u'border-bottom': [], 45 #u'border-color': [], # list or single but same values 46 #u'border-style': [], # list or single but same values 47 #u'border-width': [], # list or single but same values 48 u'cue': [], 49 u'font': [], 50 u'list-style': [], 51 #u'margin': [], # list or single but same values 52 u'outline': [], 53 #u'padding': [], # list or single but same values 54 u'pause': [] 55 } 56 57 # simple escapes, all non unicodes 58 __simpleescapes = re.compile(ur'(\\[^0-9a-fA-F])').sub 59 60 @staticmethod
61 - def _normalize(x):
62 """ 63 normalizes x, namely: 64 65 - remove any \ before non unicode sequences (0-9a-zA-Z) so for 66 x=="c\olor\" return "color" (unicode escape sequences should have 67 been resolved by the tokenizer already) 68 - lowercase 69 """ 70 if x: 71 def removeescape(matchobj): 72 return matchobj.group(0)[1:]
73 x = Base.__simpleescapes(removeescape, x) 74 return x.lower() 75 else: 76 return x
77
78 - def _checkReadonly(self):
79 "raises xml.dom.NoModificationAllowedErr if rule/... is readonly" 80 if hasattr(self, '_readonly') and self._readonly: 81 raise xml.dom.NoModificationAllowedErr( 82 u'%s is readonly.' % self.__class__) 83 return True 84 return False
85
86 - def _splitNamespacesOff(self, text_namespaces_tuple):
87 """ 88 returns tuple (text, dict-of-namespaces) or if no namespaces are 89 in cssText returns (cssText, {}) 90 91 used in Selector, SelectorList, CSSStyleRule, CSSMediaRule and 92 CSSStyleSheet 93 """ 94 if isinstance(text_namespaces_tuple, tuple): 95 return text_namespaces_tuple[0], _SimpleNamespaces( 96 text_namespaces_tuple[1]) 97 else: 98 return text_namespaces_tuple, _SimpleNamespaces()
99
100 - def _tokenize2(self, textortokens):
101 """ 102 returns tokens of textortokens which may already be tokens in which 103 case simply returns input 104 """ 105 if not textortokens: 106 return None 107 elif isinstance(textortokens, basestring): 108 # needs to be tokenized 109 return self.__tokenizer2.tokenize( 110 textortokens) 111 elif types.GeneratorType == type(textortokens): 112 # already tokenized 113 return textortokens 114 elif isinstance(textortokens, tuple): 115 # a single token (like a comment) 116 return [textortokens] 117 else: 118 # already tokenized but return generator 119 return (x for x in textortokens)
120
121 - def _nexttoken(self, tokenizer, default=None):
122 "returns next token in generator tokenizer or the default value" 123 try: 124 return tokenizer.next() 125 except (StopIteration, AttributeError): 126 return default
127
128 - def _type(self, token):
129 "returns type of Tokenizer token" 130 if token: 131 return token[0] 132 else: 133 return None
134
135 - def _tokenvalue(self, token, normalize=False):
136 "returns value of Tokenizer token" 137 if token and normalize: 138 return Base._normalize(token[1]) 139 elif token: 140 return token[1] 141 else: 142 return None
143
144 - def _stringtokenvalue(self, token):
145 """ 146 for STRING returns the actual content without surrounding "" or '' 147 and without respective escapes, e.g.:: 148 149 "with \" char" => with " char 150 """ 151 if token: 152 value = token[1] 153 return value.replace('\\'+value[0], value[0])[1:-1] 154 else: 155 return None
156
157 - def _uritokenvalue(self, token):
158 """ 159 for URI returns the actual content without surrounding url() 160 or url(""), url('') and without respective escapes, e.g.:: 161 162 url("\"") => " 163 """ 164 if token: 165 value = token[1][4:-1].strip() 166 if (value[0] in '\'"') and (value[0] == value[-1]): 167 # a string "..." or '...' 168 value = value.replace('\\'+value[0], value[0])[1:-1] 169 return value 170 else: 171 return None
172
173 - def _tokensupto2(self, 174 tokenizer, 175 starttoken=None, 176 blockstartonly=False, # { 177 blockendonly=False, # } 178 mediaendonly=False, 179 importmediaqueryendonly=False, # ; or STRING 180 mediaqueryendonly=False, # { or STRING 181 semicolon=False, # ; 182 propertynameendonly=False, # : 183 propertyvalueendonly=False, # ! ; } 184 propertypriorityendonly=False, # ; } 185 selectorattendonly=False, # ] 186 funcendonly=False, # ) 187 listseponly=False, # , 188 separateEnd=False # returns (resulttokens, endtoken) 189 ):
190 """ 191 returns tokens upto end of atrule and end index 192 end is defined by parameters, might be ; } ) or other 193 194 default looks for ending "}" and ";" 195 """ 196 ends = u';}' 197 endtypes = () 198 brace = bracket = parant = 0 # {}, [], () 199 200 if blockstartonly: # { 201 ends = u'{' 202 brace = -1 # set to 0 with first { 203 elif blockendonly: # } 204 ends = u'}' 205 brace = 1 206 elif mediaendonly: # } 207 ends = u'}' 208 brace = 1 # rules } and mediarules } 209 elif importmediaqueryendonly: 210 # end of mediaquery which may be ; or STRING 211 ends = u';' 212 endtypes = ('STRING',) 213 elif mediaqueryendonly: 214 # end of mediaquery which may be { or STRING 215 # special case, see below 216 ends = u'{' 217 brace = -1 # set to 0 with first { 218 endtypes = ('STRING',) 219 elif semicolon: 220 ends = u';' 221 elif propertynameendonly: # : and ; in case of an error 222 ends = u':;' 223 elif propertyvalueendonly: # ; or !important 224 ends = u';!' 225 elif propertypriorityendonly: # ; 226 ends = u';' 227 elif selectorattendonly: # ] 228 ends = u']' 229 if starttoken and self._tokenvalue(starttoken) == u'[': 230 bracket = 1 231 elif funcendonly: # ) 232 ends = u')' 233 parant = 1 234 elif listseponly: # , 235 ends = u',' 236 237 resulttokens = [] 238 if starttoken: 239 resulttokens.append(starttoken) 240 if tokenizer: 241 for token in tokenizer: 242 typ, val, line, col = token 243 if 'EOF' == typ: 244 resulttokens.append(token) 245 break 246 if u'{' == val: 247 brace += 1 248 elif u'}' == val: 249 brace -= 1 250 elif u'[' == val: 251 bracket += 1 252 elif u']' == val: 253 bracket -= 1 254 # function( or single ( 255 elif u'(' == val or \ 256 Base._prods.FUNCTION == typ: 257 parant += 1 258 elif u')' == val: 259 parant -= 1 260 261 resulttokens.append(token) 262 263 if (brace == bracket == parant == 0) and ( 264 val in ends or typ in endtypes): 265 break 266 elif mediaqueryendonly and brace == -1 and ( 267 bracket == parant == 0) and typ in endtypes: 268 # mediaqueryendonly with STRING 269 break 270 271 if separateEnd: 272 # TODO: use this method as generator, then this makes sense 273 if resulttokens: 274 return resulttokens[:-1], resulttokens[-1] 275 else: 276 return resulttokens, None 277 else: 278 return resulttokens
279
280 - def _valuestr(self, t):
281 """ 282 returns string value of t (t may be a string, a list of token tuples 283 or a single tuple in format (type, value, line, col). 284 Mainly used to get a string value of t for error messages. 285 """ 286 if not t: 287 return u'' 288 elif isinstance(t, basestring): 289 return t 290 else: 291 return u''.join([x[1] for x in t])
292
293 - def _adddefaultproductions(self, productions, new=None):
294 """ 295 adds default productions if not already present, used by 296 _parse only 297 298 each production should return the next expected token 299 normaly a name like "uri" or "EOF" 300 some have no expectation like S or COMMENT, so simply return 301 the current value of self.__expected 302 """ 303 def ATKEYWORD(expected, seq, token, tokenizer=None): 304 "TODO: add default impl for unexpected @rule?" 305 if expected != 'EOF': 306 # TODO: parentStyleSheet=self 307 rule = cssutils.css.CSSUnknownRule() 308 rule.cssText = self._tokensupto2(tokenizer, token) 309 if rule.wellformed: 310 seq.append(rule) 311 return expected 312 else: 313 new['wellformed'] = False 314 self._log.error(u'Expected EOF.', token=token) 315 return expected
316 317 def COMMENT(expected, seq, token, tokenizer=None): 318 "default implementation for COMMENT token adds CSSCommentRule" 319 seq.append(cssutils.css.CSSComment([token])) 320 return expected 321 322 def S(expected, seq, token, tokenizer=None): 323 "default implementation for S token, does nothing" 324 return expected 325 326 def EOF(expected=None, seq=None, token=None, tokenizer=None): 327 "default implementation for EOF token" 328 return 'EOF' 329 330 p = {'ATKEYWORD': ATKEYWORD, 331 'COMMENT': COMMENT, 332 'S': S, 333 'EOF': EOF # only available if fullsheet 334 } 335 p.update(productions) 336 return p 337
338 - def _parse(self, expected, seq, tokenizer, productions, default=None, 339 new=None):
340 """ 341 puts parsed tokens in seq by calling a production with 342 (seq, tokenizer, token) 343 344 expected 345 a name what token or value is expected next, e.g. 'uri' 346 seq 347 to add rules etc to 348 tokenizer 349 call tokenizer.next() to get next token 350 productions 351 callbacks {tokentype: callback} 352 default 353 default callback if tokentype not in productions 354 new 355 used to init default productions 356 357 returns (wellformed, expected) which the last prod might have set 358 """ 359 wellformed = True 360 if tokenizer: 361 prods = self._adddefaultproductions(productions, new) 362 for token in tokenizer: 363 p = prods.get(token[0], default) 364 if p: 365 expected = p(expected, seq, token, tokenizer) 366 else: 367 wellformed = False 368 self._log.error(u'Unexpected token (%s, %s, %s, %s)' % token) 369 return wellformed, expected
370
371 372 -class Base2(Base):
373 """ 374 Base class for new seq handling, used by Selector for now only 375 """
376 - def __init__(self):
377 self._seq = Seq()
378
379 - def _setSeq(self, newseq):
380 """ 381 sets newseq and makes it readonly 382 """ 383 newseq._readonly = True 384 self._seq = newseq
385 386 seq = property(lambda self: self._seq, doc="seq for most classes") 387
388 - def _tempSeq(self, readonly=False):
389 "get a writeable Seq() which is added later" 390 return Seq(readonly=readonly)
391
392 - def _adddefaultproductions(self, productions, new=None):
393 """ 394 adds default productions if not already present, used by 395 _parse only 396 397 each production should return the next expected token 398 normaly a name like "uri" or "EOF" 399 some have no expectation like S or COMMENT, so simply return 400 the current value of self.__expected 401 """ 402 def ATKEYWORD(expected, seq, token, tokenizer=None): 403 "default impl for unexpected @rule" 404 if expected != 'EOF': 405 # TODO: parentStyleSheet=self 406 rule = cssutils.css.CSSUnknownRule() 407 rule.cssText = self._tokensupto2(tokenizer, token) 408 if rule.wellformed: 409 seq.append(rule, cssutils.css.CSSRule.UNKNOWN_RULE, 410 line=token[2], col=token[3]) 411 return expected 412 else: 413 new['wellformed'] = False 414 self._log.error(u'Expected EOF.', token=token) 415 return expected
416 417 def COMMENT(expected, seq, token, tokenizer=None): 418 "default impl, adds CSSCommentRule if not token == EOF" 419 if expected == 'EOF': 420 new['wellformed'] = False 421 self._log.error(u'Expected EOF but found comment.', token=token) 422 seq.append(cssutils.css.CSSComment([token]), 'COMMENT') 423 return expected
424 425 def S(expected, seq, token, tokenizer=None): 426 "default impl, does nothing if not token == EOF" 427 if expected == 'EOF': 428 new['wellformed'] = False 429 self._log.error(u'Expected EOF but found whitespace.', token=token) 430 return expected 431 432 def EOF(expected=None, seq=None, token=None, tokenizer=None): 433 "default implementation for EOF token" 434 return 'EOF' 435 436 defaultproductions = {'ATKEYWORD': ATKEYWORD, 437 'COMMENT': COMMENT, 438 'S': S, 439 'EOF': EOF # only available if fullsheet 440 } 441 defaultproductions.update(productions) 442 return defaultproductions 443
444 445 -class Seq(object):
446 """ 447 property seq of Base2 inheriting classes, holds a list of Item objects. 448 449 used only by Selector for now 450 451 is normally readonly, only writable during parsing 452 """
453 - def __init__(self, readonly=True):
454 """ 455 only way to write to a Seq is to initialize it with new items 456 each itemtuple has (value, type, line) where line is optional 457 """ 458 self._seq = [] 459 self._readonly = readonly
460
461 - def __delitem__(self, i):
462 del self._seq[i]
463
464 - def __getitem__(self, i):
465 return self._seq[i]
466
467 - def __setitem__(self, i, (val, typ, line, col)):
468 self._seq[i] = Item(val, typ, line, col)
469
470 - def __iter__(self):
471 return iter(self._seq)
472
473 - def __len__(self):
474 return len(self._seq)
475
476 - def append(self, val, typ, line=None, col=None):
477 "if not readonly add new Item()" 478 if self._readonly: 479 raise AttributeError('Seq is readonly.') 480 else: 481 self._seq.append(Item(val, typ, line, col))
482
483 - def appendItem(self, item):
484 "if not readonly add item which must be an Item" 485 if self._readonly: 486 raise AttributeError('Seq is readonly.') 487 else: 488 self._seq.append(item)
489
490 - def replace(self, index=-1, val=None, typ=None, line=None, col=None):
491 """ 492 if not readonly replace Item at index with new Item or 493 simply replace value or type 494 """ 495 if self._readonly: 496 raise AttributeError('Seq is readonly.') 497 else: 498 self._seq[index] = Item(val, typ, line, col)
499
500 - def __repr__(self):
501 "returns a repr same as a list of tuples of (value, type)" 502 return u'cssutils.%s.%s([\n %s])' % (self.__module__, 503 self.__class__.__name__, 504 u',\n '.join([u'(%r, %r)' % (item.type, item.value) 505 for item in self._seq] 506 ))
507 - def __str__(self):
508 return "<cssutils.%s.%s object length=%r at 0x%x>" % ( 509 self.__module__, self.__class__.__name__, len(self), id(self))
510
511 -class Item(object):
512 """ 513 an item in the seq list of classes (successor to tuple items in old seq) 514 515 each item has attributes: 516 517 type 518 a sematic type like "element", "attribute" 519 value 520 the actual value which may be a string, number etc or an instance 521 of e.g. a CSSComment 522 *line* 523 **NOT IMPLEMENTED YET, may contain the line in the source later** 524 """
525 - def __init__(self, value, type, line=None, col=None):
526 self.__value = value 527 self.__type = type 528 self.__line = line 529 self.__col = col
530 531 type = property(lambda self: self.__type) 532 value = property(lambda self: self.__value) 533 line = property(lambda self: self.__line) 534 col = property(lambda self: self.__col) 535
536 - def __repr__(self):
537 return "%s.%s(value=%r, type=%r, line=%r, col=%r)" % ( 538 self.__module__, self.__class__.__name__, 539 self.__value, self.__type, self.__line, self.__col)
540
541 542 -class ListSeq(object):
543 """ 544 (EXPERIMENTAL) 545 A base class used for list classes like css.SelectorList or 546 stylesheets.MediaList 547 548 adds list like behaviour running on inhering class' property ``seq`` 549 550 - item in x => bool 551 - len(x) => integer 552 - get, set and del x[i] 553 - for item in x 554 - append(item) 555 556 some methods must be overwritten in inheriting class 557 """
558 - def __init__(self):
559 self.seq = [] # does not need to use ``Seq`` as simple list only
560
561 - def __contains__(self, item):
562 return item in self.seq
563
564 - def __delitem__(self, index):
565 del self.seq[index]
566
567 - def __getitem__(self, index):
568 return self.seq[index]
569
570 - def __iter__(self):
571 def gen(): 572 for x in self.seq: 573 yield x
574 return gen()
575
576 - def __len__(self):
577 return len(self.seq)
578
579 - def __setitem__(self, index, item):
580 "must be overwritten" 581 raise NotImplementedError
582
583 - def append(self, item):
584 "must be overwritten" 585 raise NotImplementedError
586
587 588 -class _Namespaces(object):
589 """ 590 A dictionary like wrapper for @namespace rules used in a CSSStyleSheet. 591 Works on effective namespaces, so e.g. if:: 592 593 @namespace p1 "uri"; 594 @namespace p2 "uri"; 595 596 only the second rule is effective and kept. 597 598 namespaces 599 a dictionary {prefix: namespaceURI} containing the effective namespaces 600 only. These are the latest set in the CSSStyleSheet. 601 parentStyleSheet 602 the parent CSSStyleSheet 603 """
604 - def __init__(self, parentStyleSheet, *args):
605 "no initial values are set, only the relevant sheet is" 606 self.parentStyleSheet = parentStyleSheet
607
608 - def __contains__(self, prefix):
609 return prefix in self.namespaces
610
611 - def __delitem__(self, prefix):
612 """deletes CSSNamespaceRule(s) with rule.prefix == prefix 613 614 prefix '' and None are handled the same 615 """ 616 if not prefix: 617 prefix = u'' 618 delrule = self.__findrule(prefix) 619 for i, rule in enumerate(ifilter(lambda r: r.type == r.NAMESPACE_RULE, 620 self.parentStyleSheet.cssRules)): 621 if rule == delrule: 622 self.parentStyleSheet.deleteRule(i) 623 return 624 625 raise xml.dom.NamespaceErr('Prefix %r not found.' % prefix)
626
627 - def __getitem__(self, prefix):
628 try: 629 return self.namespaces[prefix] 630 except KeyError, e: 631 raise xml.dom.NamespaceErr('Prefix %r not found.' % prefix)
632
633 - def __iter__(self):
634 return self.namespaces.__iter__()
635
636 - def __len__(self):
637 return len(self.namespaces)
638
639 - def __setitem__(self, prefix, namespaceURI):
640 "replaces prefix or sets new rule, may raise NoModificationAllowedErr" 641 if not prefix: 642 prefix = u'' # None or '' 643 rule = self.__findrule(prefix) 644 if not rule: 645 self.parentStyleSheet.insertRule(cssutils.css.CSSNamespaceRule( 646 prefix=prefix, 647 namespaceURI=namespaceURI), 648 inOrder=True) 649 else: 650 if prefix in self.namespaces: 651 rule.namespaceURI = namespaceURI # raises NoModificationAllowedErr 652 if namespaceURI in self.namespaces.values(): 653 rule.prefix = prefix
654
655 - def __findrule(self, prefix):
656 # returns namespace rule where prefix == key 657 for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE, 658 reversed(self.parentStyleSheet.cssRules)): 659 if rule.prefix == prefix: 660 return rule
661
662 - def __getNamespaces(self):
663 namespaces = {} 664 for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE, 665 reversed(self.parentStyleSheet.cssRules)): 666 if rule.namespaceURI not in namespaces.values(): 667 namespaces[rule.prefix] = rule.namespaceURI 668 return namespaces
669 670 namespaces = property(__getNamespaces, 671 doc=u'Holds only effective @namespace rules in self.parentStyleSheets' 672 '@namespace rules.') 673
674 - def get(self, prefix, default):
675 return self.namespaces.get(prefix, default)
676
677 - def items(self):
678 return self.namespaces.items()
679
680 - def keys(self):
681 return self.namespaces.keys()
682
683 - def values(self):
684 return self.namespaces.values()
685
686 - def prefixForNamespaceURI(self, namespaceURI):
687 """ 688 returns effective prefix for given namespaceURI or raises IndexError 689 if this cannot be found""" 690 for prefix, uri in self.namespaces.items(): 691 if uri == namespaceURI: 692 return prefix 693 raise IndexError(u'NamespaceURI %r not found.' % namespaceURI)
694
695 - def __str__(self):
696 return u"<cssutils.util.%s object parentStyleSheet=%r at 0x%x>" % ( 697 self.__class__.__name__, str(self.parentStyleSheet), id(self))
698
699 700 -class _SimpleNamespaces(_Namespaces):
701 """ 702 namespaces used in objects like Selector as long as they are not connected 703 to a CSSStyleSheet 704 """
705 - def __init__(self, *args):
706 self.__namespaces = dict(*args)
707
708 - def __setitem__(self, prefix, namespaceURI):
709 self.__namespaces[prefix] = namespaceURI
710 711 namespaces = property(lambda self: self.__namespaces, 712 doc=u'Dict Wrapper for self.sheets @namespace rules.') 713
714 - def __str__(self):
715 return u"<cssutils.util.%s object namespaces=%r at 0x%x>" % ( 716 self.__class__.__name__, self.namespaces, id(self))
717
718 - def __repr__(self):
719 return u"cssutils.util.%s(%r)" % (self.__class__.__name__, 720 self.namespaces)
721
722 723 -def _defaultFetcher(url):
724 """Retrieve data from ``url``. cssutils default implementation of fetch 725 URL function. 726 727 Returns ``(encoding, string)`` or ``None`` 728 """ 729 try: 730 res = urllib2.urlopen(url) 731 except OSError, e: 732 # e.g if file URL and not found 733 cssutils.log.warn(e, error=OSError) 734 except (OSError, ValueError), e: 735 # invalid url, e.g. "1" 736 cssutils.log.warn(u'ValueError, %s' % e.message, error=ValueError) 737 except urllib2.HTTPError, e: 738 # http error, e.g. 404, e can be raised 739 cssutils.log.warn(u'HTTPError opening url=%r: %s %s' % 740 (url, e.code, e.msg), error=e) 741 except urllib2.URLError, e: 742 # URLError like mailto: or other IO errors, e can be raised 743 cssutils.log.warn(u'URLError, %s' % e.reason, error=e) 744 else: 745 if res: 746 mimeType, encoding = encutils.getHTTPInfo(res) 747 if mimeType != u'text/css': 748 cssutils.log.error(u'Expected "text/css" mime type for url=%s but found: %r' % 749 (url, mimeType), error=ValueError) 750 return encoding, res.read()
751
752 -def _readUrl(url, fetcher=None, overrideEncoding=None, parentEncoding=None):
753 """ 754 Read cssText from url and decode it using all relevant methods (HTTP 755 header, BOM, @charset). Returns 756 757 - encoding used to decode text (which is needed to set encoding of 758 stylesheet properly) 759 - type of encoding (how it was retrieved, see list below) 760 - decodedCssText 761 762 ``fetcher`` 763 see cssutils.registerFetchUrl for details 764 ``overrideEncoding`` 765 If given this encoding is used and all other encoding information is 766 ignored (HTTP, BOM etc) 767 ``parentEncoding`` 768 Encoding of parent stylesheet (while e.g. reading @import references sheets) 769 or document if available. 770 771 Priority or encoding information 772 -------------------------------- 773 **cssutils only**: 0. overrideEncoding 774 775 1. An HTTP "charset" parameter in a "Content-Type" field (or similar parameters in other protocols) 776 2. BOM and/or @charset (see below) 777 3. <link charset=""> or other metadata from the linking mechanism (if any) 778 4. charset of referring style sheet or document (if any) 779 5. Assume UTF-8 780 781 """ 782 enctype = None 783 784 if not fetcher: 785 fetcher = _defaultFetcher 786 r = fetcher(url) 787 if r and len(r) == 2 and r[1] is not None: 788 httpEncoding, content = r 789 790 if overrideEncoding: 791 enctype = 0 # 0. override encoding 792 encoding = overrideEncoding 793 elif httpEncoding: 794 enctype = 1 # 1. HTTP 795 encoding = httpEncoding 796 else: 797 # check content 798 contentEncoding, explicit = cssutils.codec.detectencoding_str(content) 799 if explicit: 800 enctype = 2 # 2. BOM/@charset: explicitly 801 encoding = contentEncoding 802 elif parentEncoding: 803 enctype = 4 # 4. parent stylesheet or document 804 # may also be None in which case 5. is used in next step anyway 805 encoding = parentEncoding 806 else: 807 enctype = 5 # 5. assume UTF-8 808 encoding = 'utf-8' 809 810 try: 811 # encoding may still be wrong if encoding *is lying*! 812 decodedCssText = codecs.lookup("css")[1](content, encoding=encoding)[0] 813 except UnicodeDecodeError, e: 814 cssutils.log.warn(e, neverraise=True) 815 decodedCssText = None 816 817 return encoding, enctype, decodedCssText 818 else: 819 return None, None, None
820