Package cssutils :: Module util
[hide private]
[frames] | no frames]

Source Code for Module cssutils.util

  1  """base classes for css and stylesheets packages 
  2   
  3  **this test class does not run standalone!** 
  4  see _readURL() to fix this temporarily 
  5   
  6  """ 
  7  __all__ = [] 
  8  __docformat__ = 'restructuredtext' 
  9  __version__ = '$Id: util.py 1198 2008-03-23 00:18:10Z cthedot $' 
 10   
 11   
 12  import codecs 
 13  from itertools import ifilter 
 14  import re 
 15  import types 
 16  import urllib2 
 17  import xml.dom 
 18  import cssutils 
 19  from tokenize2 import Tokenizer 
20 21 -class Base(object):
22 """ 23 Base class for most CSS and StyleSheets classes 24 25 **Superceded by Base2 which is used for new seq handling class.** 26 See cssutils.util.Base2 27 28 Contains helper methods for inheriting classes helping parsing 29 30 ``_normalize`` is static as used by Preferences. 31 """ 32 __tokenizer2 = Tokenizer() 33 34 _log = cssutils.log 35 _prods = cssutils.tokenize2.CSSProductions 36 37 # for more on shorthand properties see 38 # http://www.dustindiaz.com/css-shorthand/ 39 # format: shorthand: [(propname, mandatorycheck?)*] 40 _SHORTHANDPROPERTIES = { 41 u'background': [], 42 u'border': [], 43 u'border-left': [], 44 u'border-right': [], 45 u'border-top': [], 46 u'border-bottom': [], 47 u'border-color': [], 48 u'border-style': [], 49 u'border-width': [], 50 u'cue': [], 51 u'font': [], 52 # [('font-weight', True), 53 # ('font-size', True), 54 # ('line-height', False), 55 # ('font-family', True)], 56 u'list-style': [], 57 u'margin': [], 58 u'outline': [], 59 u'padding': [], 60 u'pause': [] 61 } 62 63 # simple escapes, all non unicodes 64 __escapes = re.compile(ur'(\\[^0-9a-fA-F])').sub 65 # all unicode (see cssproductions "unicode") 66 __unicodes = re.compile(ur'\\[0-9a-fA-F]{1,6}[\t|\r|\n|\f|\x20]?').sub 67 68 @staticmethod
69 - def _normalize(x):
70 """ 71 normalizes x, namely: 72 73 - remove any \ before non unicode sequences (0-9a-zA-Z) so for 74 x=="c\olor\" return "color" (unicode escape sequences should have 75 been resolved by the tokenizer already) 76 - lowercase 77 """ 78 if x: 79 def removeescape(matchobj): 80 return matchobj.group(0)[1:]
81 x = Base.__escapes(removeescape, x) 82 return x.lower() 83 else: 84 return x
85
86 - def _checkReadonly(self):
87 "raises xml.dom.NoModificationAllowedErr if rule/... is readonly" 88 if hasattr(self, '_readonly') and self._readonly: 89 raise xml.dom.NoModificationAllowedErr( 90 u'%s is readonly.' % self.__class__) 91 return True 92 return False
93
94 - def _splitNamespacesOff(self, text_namespaces_tuple):
95 """ 96 returns tuple (text, dict-of-namespaces) or if no namespaces are 97 in cssText returns (cssText, {}) 98 99 used in Selector, SelectorList, CSSStyleRule, CSSMediaRule and 100 CSSStyleSheet 101 """ 102 if isinstance(text_namespaces_tuple, tuple): 103 return text_namespaces_tuple[0], _SimpleNamespaces( 104 text_namespaces_tuple[1]) 105 else: 106 return text_namespaces_tuple, _SimpleNamespaces()
107
108 - def _tokenize2(self, textortokens):
109 """ 110 returns tokens of textortokens which may already be tokens in which 111 case simply returns input 112 """ 113 if not textortokens: 114 return None 115 elif isinstance(textortokens, basestring): 116 # needs to be tokenized 117 return self.__tokenizer2.tokenize( 118 textortokens) 119 elif types.GeneratorType == type(textortokens): 120 # already tokenized 121 return textortokens 122 elif isinstance(textortokens, tuple): 123 # a single token (like a comment) 124 return [textortokens] 125 else: 126 # already tokenized but return generator 127 return (x for x in textortokens)
128
129 - def _nexttoken(self, tokenizer, default=None):
130 "returns next token in generator tokenizer or the default value" 131 try: 132 return tokenizer.next() 133 except (StopIteration, AttributeError): 134 return default
135
136 - def _type(self, token):
137 "returns type of Tokenizer token" 138 if token: 139 return token[0] 140 else: 141 return None
142
143 - def _tokenvalue(self, token, normalize=False):
144 "returns value of Tokenizer token" 145 if token and normalize: 146 return Base._normalize(token[1]) 147 elif token: 148 return token[1] 149 else: 150 return None
151
152 - def _stringtokenvalue(self, token):
153 """ 154 for STRING returns the actual content without surrounding "" or '' 155 and without respective escapes, e.g.:: 156 157 "with \" char" => with " char 158 """ 159 if token: 160 value = token[1] 161 return value.replace('\\'+value[0], value[0])[1:-1] 162 else: 163 return None
164
165 - def _uritokenvalue(self, token):
166 """ 167 for URI returns the actual content without surrounding url() 168 or url(""), url('') and without respective escapes, e.g.:: 169 170 url("\"") => " 171 """ 172 if token: 173 value = token[1][4:-1].strip() 174 if (value[0] in '\'"') and (value[0] == value[-1]): 175 # a string "..." or '...' 176 value = value.replace('\\'+value[0], value[0])[1:-1] 177 return value 178 else: 179 return None
180
181 - def _tokensupto2(self, 182 tokenizer, 183 starttoken=None, 184 blockstartonly=False, # { 185 blockendonly=False, # } 186 mediaendonly=False, 187 importmediaqueryendonly=False, # ; or STRING 188 mediaqueryendonly=False, # { or STRING 189 semicolon=False, # ; 190 propertynameendonly=False, # : 191 propertyvalueendonly=False, # ! ; } 192 propertypriorityendonly=False, # ; } 193 selectorattendonly=False, # ] 194 funcendonly=False, # ) 195 listseponly=False, # , 196 separateEnd=False # returns (resulttokens, endtoken) 197 ):
198 """ 199 returns tokens upto end of atrule and end index 200 end is defined by parameters, might be ; } ) or other 201 202 default looks for ending "}" and ";" 203 """ 204 ends = u';}' 205 endtypes = () 206 brace = bracket = parant = 0 # {}, [], () 207 208 if blockstartonly: # { 209 ends = u'{' 210 brace = -1 # set to 0 with first { 211 elif blockendonly: # } 212 ends = u'}' 213 brace = 1 214 elif mediaendonly: # } 215 ends = u'}' 216 brace = 1 # rules } and mediarules } 217 elif importmediaqueryendonly: 218 # end of mediaquery which may be ; or STRING 219 ends = u';' 220 endtypes = ('STRING',) 221 elif mediaqueryendonly: 222 # end of mediaquery which may be { or STRING 223 # special case, see below 224 ends = u'{' 225 brace = -1 # set to 0 with first { 226 endtypes = ('STRING',) 227 elif semicolon: 228 ends = u';' 229 elif propertynameendonly: # : and ; in case of an error 230 ends = u':;' 231 elif propertyvalueendonly: # ; or !important 232 ends = u';!' 233 elif propertypriorityendonly: # ; 234 ends = u';' 235 elif selectorattendonly: # ] 236 ends = u']' 237 if starttoken and self._tokenvalue(starttoken) == u'[': 238 bracket = 1 239 elif funcendonly: # ) 240 ends = u')' 241 parant = 1 242 elif listseponly: # , 243 ends = u',' 244 245 resulttokens = [] 246 if starttoken: 247 resulttokens.append(starttoken) 248 if tokenizer: 249 for token in tokenizer: 250 typ, val, line, col = token 251 if 'EOF' == typ: 252 resulttokens.append(token) 253 break 254 if u'{' == val: 255 brace += 1 256 elif u'}' == val: 257 brace -= 1 258 elif u'[' == val: 259 bracket += 1 260 elif u']' == val: 261 bracket -= 1 262 # function( or single ( 263 elif u'(' == val or \ 264 Base._prods.FUNCTION == typ: 265 parant += 1 266 elif u')' == val: 267 parant -= 1 268 269 resulttokens.append(token) 270 271 if (brace == bracket == parant == 0) and ( 272 val in ends or typ in endtypes): 273 break 274 elif mediaqueryendonly and brace == -1 and ( 275 bracket == parant == 0) and typ in endtypes: 276 # mediaqueryendonly with STRING 277 break 278 279 if separateEnd: 280 # TODO: use this method as generator, then this makes sense 281 if resulttokens: 282 return resulttokens[:-1], resulttokens[-1] 283 else: 284 return resulttokens, None 285 else: 286 return resulttokens
287
288 - def _valuestr(self, t):
289 """ 290 returns string value of t (t may be a string, a list of token tuples 291 or a single tuple in format (type, value, line, col). 292 Mainly used to get a string value of t for error messages. 293 """ 294 if not t: 295 return u'' 296 elif isinstance(t, basestring): 297 return t 298 else: 299 return u''.join([x[1] for x in t])
300
301 - def _adddefaultproductions(self, productions, new=None):
302 """ 303 adds default productions if not already present, used by 304 _parse only 305 306 each production should return the next expected token 307 normaly a name like "uri" or "EOF" 308 some have no expectation like S or COMMENT, so simply return 309 the current value of self.__expected 310 """ 311 def ATKEYWORD(expected, seq, token, tokenizer=None): 312 "TODO: add default impl for unexpected @rule?" 313 return expected
314 315 def COMMENT(expected, seq, token, tokenizer=None): 316 "default implementation for COMMENT token adds CSSCommentRule" 317 seq.append(cssutils.css.CSSComment([token])) 318 return expected 319 320 def S(expected, seq, token, tokenizer=None): 321 "default implementation for S token, does nothing" 322 return expected 323 324 def EOF(expected=None, seq=None, token=None, tokenizer=None): 325 "default implementation for EOF token" 326 return 'EOF' 327 328 p = {'ATKEYWORD': ATKEYWORD, 329 'COMMENT': COMMENT, 330 'S': S, 331 'EOF': EOF # only available if fullsheet 332 } 333 p.update(productions) 334 return p 335
336 - def _parse(self, expected, seq, tokenizer, productions, default=None, 337 new=None):
338 """ 339 puts parsed tokens in seq by calling a production with 340 (seq, tokenizer, token) 341 342 expected 343 a name what token or value is expected next, e.g. 'uri' 344 seq 345 to add rules etc to 346 tokenizer 347 call tokenizer.next() to get next token 348 productions 349 callbacks {tokentype: callback} 350 default 351 default callback if tokentype not in productions 352 new 353 used to init default productions 354 355 returns (wellformed, expected) which the last prod might have set 356 """ 357 wellformed = True 358 if tokenizer: 359 prods = self._adddefaultproductions(productions, new) 360 for token in tokenizer: 361 p = prods.get(token[0], default) 362 if p: 363 expected = p(expected, seq, token, tokenizer) 364 else: 365 wellformed = False 366 self._log.error(u'Unexpected token (%s, %s, %s, %s)' % token) 367 return wellformed, expected
368
369 370 -class Base2(Base):
371 """ 372 Base class for new seq handling, used by Selector for now only 373 """
374 - def __init__(self):
375 self._seq = Seq()
376
377 - def _setSeq(self, newseq):
378 """ 379 sets newseq and makes it readonly 380 """ 381 newseq._readonly = True 382 self._seq = newseq
383 384 seq = property(lambda self: self._seq, doc="seq for most classes") 385
386 - def _tempSeq(self, readonly=False):
387 "get a writeable Seq() which is added later" 388 return Seq(readonly=readonly)
389
390 - def _adddefaultproductions(self, productions, new=None):
391 """ 392 adds default productions if not already present, used by 393 _parse only 394 395 each production should return the next expected token 396 normaly a name like "uri" or "EOF" 397 some have no expectation like S or COMMENT, so simply return 398 the current value of self.__expected 399 """ 400 def ATKEYWORD(expected, seq, token, tokenizer=None): 401 "default impl for unexpected @rule" 402 if expected != 'EOF': 403 # TODO: parentStyleSheet=self 404 rule = cssutils.css.CSSUnknownRule() 405 rule.cssText = self._tokensupto2(tokenizer, token) 406 if rule.wellformed: 407 seq.append(rule, cssutils.css.CSSRule.UNKNOWN_RULE, 408 line=token[2], col=token[3]) 409 return expected 410 else: 411 new['wellformed'] = False 412 self._log.error(u'Expected EOF.', token=token) 413 return expected
414 415 def COMMENT(expected, seq, token, tokenizer=None): 416 "default impl, adds CSSCommentRule if not token == EOF" 417 if expected == 'EOF': 418 new['wellformed'] = False 419 self._log.error(u'Expected EOF but found comment.', token=token) 420 seq.append(cssutils.css.CSSComment([token]), 'COMMENT') 421 return expected
422 423 def S(expected, seq, token, tokenizer=None): 424 "default impl, does nothing if not token == EOF" 425 if expected == 'EOF': 426 new['wellformed'] = False 427 self._log.error(u'Expected EOF but found whitespace.', token=token) 428 return expected 429 430 def EOF(expected=None, seq=None, token=None, tokenizer=None): 431 "default implementation for EOF token" 432 return 'EOF' 433 434 defaultproductions = {'ATKEYWORD': ATKEYWORD, 435 'COMMENT': COMMENT, 436 'S': S, 437 'EOF': EOF # only available if fullsheet 438 } 439 defaultproductions.update(productions) 440 return defaultproductions 441
442 443 -class Seq(object):
444 """ 445 property seq of Base2 inheriting classes, holds a list of Item objects. 446 447 used only by Selector for now 448 449 is normally readonly, only writable during parsing 450 """
451 - def __init__(self, readonly=True):
452 """ 453 only way to write to a Seq is to initialize it with new items 454 each itemtuple has (value, type, line) where line is optional 455 """ 456 self._seq = [] 457 self._readonly = readonly
458
459 - def __delitem__(self, i):
460 del self._seq[i]
461
462 - def __getitem__(self, i):
463 return self._seq[i]
464
465 - def __setitem__(self, i, (val, typ, line, col)):
466 self._seq[i] = Item(val, typ, line, col)
467
468 - def __iter__(self):
469 return iter(self._seq)
470
471 - def __len__(self):
472 return len(self._seq)
473
474 - def append(self, val, typ, line=None, col=None):
475 "if not readonly add new Item()" 476 if self._readonly: 477 raise AttributeError('Seq is readonly.') 478 else: 479 self._seq.append(Item(val, typ, line, col))
480
481 - def replace(self, index=-1, val=None, typ=None, line=None, col=None):
482 """ 483 if not readonly replace Item at index with new Item or 484 simply replace value or type 485 """ 486 if self._readonly: 487 raise AttributeError('Seq is readonly.') 488 else: 489 self._seq[index] = Item(val, typ, line, col)
490
491 - def __repr__(self):
492 "returns a repr same as a list of tuples of (value, type)" 493 return u'cssutils.%s.%s([\n %s])' % (self.__module__, 494 self.__class__.__name__, 495 u',\n '.join([u'(%r, %r)' % (item.type, item.value) 496 for item in self._seq] 497 ))
498 - def __str__(self):
499 return "<cssutils.%s.%s object length=%r at 0x%x>" % ( 500 self.__module__, self.__class__.__name__, len(self), id(self))
501
502 -class Item(object):
503 """ 504 an item in the seq list of classes (successor to tuple items in old seq) 505 506 each item has attributes: 507 508 type 509 a sematic type like "element", "attribute" 510 value 511 the actual value which may be a string, number etc or an instance 512 of e.g. a CSSComment 513 *line* 514 **NOT IMPLEMENTED YET, may contain the line in the source later** 515 """
516 - def __init__(self, value, type, line=None, col=None):
517 self.__value = value 518 self.__type = type 519 self.__line = line 520 self.__col = col
521 522 type = property(lambda self: self.__type) 523 value = property(lambda self: self.__value) 524 line = property(lambda self: self.__line) 525 col = property(lambda self: self.__col) 526
527 - def __repr__(self):
528 return "%s.%s(value=%r, type=%r, line=%r, col=%r)" % ( 529 self.__module__, self.__class__.__name__, 530 self.__value, self.__type, self.__line, self.__col)
531
532 533 -class ListSeq(object):
534 """ 535 (EXPERIMENTAL) 536 A base class used for list classes like css.SelectorList or 537 stylesheets.MediaList 538 539 adds list like behaviour running on inhering class' property ``seq`` 540 541 - item in x => bool 542 - len(x) => integer 543 - get, set and del x[i] 544 - for item in x 545 - append(item) 546 547 some methods must be overwritten in inheriting class 548 """
549 - def __init__(self):
550 self.seq = [] # does not need to use ``Seq`` as simple list only
551
552 - def __contains__(self, item):
553 return item in self.seq
554
555 - def __delitem__(self, index):
556 del self.seq[index]
557
558 - def __getitem__(self, index):
559 return self.seq[index]
560
561 - def __iter__(self):
562 def gen(): 563 for x in self.seq: 564 yield x
565 return gen()
566
567 - def __len__(self):
568 return len(self.seq)
569
570 - def __setitem__(self, index, item):
571 "must be overwritten" 572 raise NotImplementedError
573
574 - def append(self, item):
575 "must be overwritten" 576 raise NotImplementedError
577
578 579 -class Deprecated(object):
580 """This is a decorator which can be used to mark functions 581 as deprecated. It will result in a warning being emitted 582 when the function is used. 583 584 It accepts a single paramter ``msg`` which is shown with the warning. 585 It should contain information which function or method to use instead. 586 """
587 - def __init__(self, msg):
588 self.msg = msg
589
590 - def __call__(self, func):
591 def newFunc(*args, **kwargs): 592 import warnings 593 warnings.warn("Call to deprecated method %r. %s" % 594 (func.__name__, self.msg), 595 category=DeprecationWarning, 596 stacklevel=2) 597 return func(*args, **kwargs)
598 newFunc.__name__ = func.__name__ 599 newFunc.__doc__ = func.__doc__ 600 newFunc.__dict__.update(func.__dict__) 601 return newFunc
602
603 604 -class _Namespaces(object):
605 """ 606 A dictionary like wrapper for @namespace rules used in a CSSStyleSheet. 607 Works on effective namespaces, so e.g. if:: 608 609 @namespace p1 "uri"; 610 @namespace p2 "uri"; 611 612 only the second rule is effective and kept. 613 614 namespaces 615 a dictionary {prefix: namespaceURI} containing the effective namespaces 616 only. These are the latest set in the CSSStyleSheet. 617 parentStyleSheet 618 the parent CSSStyleSheet 619 """
620 - def __init__(self, parentStyleSheet, *args):
621 "no initial values are set, only the relevant sheet is" 622 self.parentStyleSheet = parentStyleSheet
623
624 - def __contains__(self, prefix):
625 return prefix in self.namespaces
626
627 - def __delitem__(self, prefix):
628 """deletes CSSNamespaceRule(s) with rule.prefix == prefix 629 630 prefix '' and None are handled the same 631 """ 632 if not prefix: 633 prefix = u'' 634 delrule = self.__findrule(prefix) 635 for i, rule in enumerate(ifilter(lambda r: r.type == r.NAMESPACE_RULE, 636 self.parentStyleSheet.cssRules)): 637 if rule == delrule: 638 self.parentStyleSheet.deleteRule(i) 639 return 640 641 raise xml.dom.NamespaceErr('Prefix %r not found.' % prefix)
642
643 - def __getitem__(self, prefix):
644 try: 645 return self.namespaces[prefix] 646 except KeyError, e: 647 raise xml.dom.NamespaceErr('Prefix %r not found.' % prefix)
648
649 - def __iter__(self):
650 return self.namespaces.__iter__()
651
652 - def __len__(self):
653 return len(self.namespaces)
654
655 - def __setitem__(self, prefix, namespaceURI):
656 "replaces prefix or sets new rule, may raise NoModificationAllowedErr" 657 if not prefix: 658 prefix = u'' # None or '' 659 rule = self.__findrule(prefix) 660 if not rule: 661 self.parentStyleSheet.insertRule(cssutils.css.CSSNamespaceRule( 662 prefix=prefix, 663 namespaceURI=namespaceURI), 664 inOrder=True) 665 else: 666 if prefix in self.namespaces: 667 rule.namespaceURI = namespaceURI # raises NoModificationAllowedErr 668 if namespaceURI in self.namespaces.values(): 669 rule.prefix = prefix
670
671 - def __findrule(self, prefix):
672 # returns namespace rule where prefix == key 673 for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE, 674 reversed(self.parentStyleSheet.cssRules)): 675 if rule.prefix == prefix: 676 return rule
677
678 - def __getNamespaces(self):
679 namespaces = {} 680 for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE, 681 reversed(self.parentStyleSheet.cssRules)): 682 if rule.namespaceURI not in namespaces.values(): 683 namespaces[rule.prefix] = rule.namespaceURI 684 return namespaces
685 686 namespaces = property(__getNamespaces, 687 doc=u'Holds only effective @namespace rules in self.parentStyleSheets' 688 '@namespace rules.') 689
690 - def get(self, prefix, default):
691 return self.namespaces.get(prefix, default)
692
693 - def items(self):
694 return self.namespaces.items()
695
696 - def keys(self):
697 return self.namespaces.keys()
698
699 - def values(self):
700 return self.namespaces.values()
701
702 - def prefixForNamespaceURI(self, namespaceURI):
703 """ 704 returns effective prefix for given namespaceURI or raises IndexError 705 if this cannot be found""" 706 for prefix, uri in self.namespaces.items(): 707 if uri == namespaceURI: 708 return prefix 709 raise IndexError(u'NamespaceURI %r not found.' % namespaceURI)
710
711 - def __str__(self):
712 return u"<cssutils.util.%s object parentStyleSheet=%r namespaces=%r "\ 713 u"at 0x%x>" % ( 714 self.__class__.__name__, str(self.parentStyleSheet), 715 self.namespaces, id(self))
716
717 718 -class _SimpleNamespaces(_Namespaces):
719 """ 720 namespaces used in objects like Selector as long as they are not connected 721 to a CSSStyleSheet 722 """
723 - def __init__(self, *args):
724 self.__namespaces = dict(*args)
725
726 - def __setitem__(self, prefix, namespaceURI):
727 self.__namespaces[prefix] = namespaceURI
728 729 namespaces = property(lambda self: self.__namespaces, 730 doc=u'Dict Wrapper for self.sheets @namespace rules.') 731
732 - def __str__(self):
733 return u"<cssutils.util.%s object namespaces=%r at 0x%x>" % ( 734 self.__class__.__name__, self.namespaces, id(self))
735
736 - def __repr__(self):
737 return u"cssutils.util.%s(%r)" % (self.__class__.__name__, 738 self.namespaces)
739
740 741 -def _readURL(url, encoding=None):
742 """Retrieve text from url using explicit or detected encoding via encutils 743 """ 744 try: 745 req = urllib2.Request(url) 746 res = urllib2.urlopen(req) 747 except ValueError, e: 748 # invalid url, e.g. "1" 749 cssutils.log.warn(u'Error opening url=%r: %s' % (url, e.message), 750 error=ValueError) 751 except urllib2.HTTPError, e: 752 # http error, e.g. 404 753 cssutils.log.warn(u'Error opening url=%r: %s %s' % (url, e.code, e.msg), 754 error=e) # special case error=e! 755 except IOError, e: 756 # URLError like mailto: or other IO errors 757 cssutils.log.warn(u'Error opening url=%r: %r' % (url, e.args), 758 error=e) # special case error=e! 759 else: 760 if res: 761 # get real URL, may have been redirected 762 url = res.geturl() 763 764 if not encoding: 765 # COMMENT OUT IF RUNNING THIS TEST STANDALONE! 766 import encutils # this test class does not run standalone! 767 media_type, encoding = encutils.getHTTPInfo(res) 768 if media_type != u'text/css': 769 cssutils.log.warn(u'Unexpected media type opening url=%s: %r != "text/css"' % 770 (url, media_type)) 771 try: 772 return codecs.lookup("css")[2](res, encoding=encoding).read() 773 except urllib2.HTTPError, e: 774 # http error 775 cssutils.log.warn(u'Error reading url=%r: %s %s' % (url, e.code, e.msg), 776 error=e) # special case error=e! 777 except IOError, e: 778 cssutils.log.warn(u'Error opening url=%r: %r' % (url, e.args), 779 error=e)
780