1 """base classes for css and stylesheets packages
2
3 **this test class does not run standalone!**
4 see _readURL() to fix this temporarily
5
6 """
7 __all__ = []
8 __docformat__ = 'restructuredtext'
9 __version__ = '$Id: util.py 1198 2008-03-23 00:18:10Z cthedot $'
10
11
12 import codecs
13 from itertools import ifilter
14 import re
15 import types
16 import urllib2
17 import xml.dom
18 import cssutils
19 from tokenize2 import Tokenizer
20
21 -class Base(object):
22 """
23 Base class for most CSS and StyleSheets classes
24
25 **Superceded by Base2 which is used for new seq handling class.**
26 See cssutils.util.Base2
27
28 Contains helper methods for inheriting classes helping parsing
29
30 ``_normalize`` is static as used by Preferences.
31 """
32 __tokenizer2 = Tokenizer()
33
34 _log = cssutils.log
35 _prods = cssutils.tokenize2.CSSProductions
36
37
38
39
40 _SHORTHANDPROPERTIES = {
41 u'background': [],
42 u'border': [],
43 u'border-left': [],
44 u'border-right': [],
45 u'border-top': [],
46 u'border-bottom': [],
47 u'border-color': [],
48 u'border-style': [],
49 u'border-width': [],
50 u'cue': [],
51 u'font': [],
52
53
54
55
56 u'list-style': [],
57 u'margin': [],
58 u'outline': [],
59 u'padding': [],
60 u'pause': []
61 }
62
63
64 __escapes = re.compile(ur'(\\[^0-9a-fA-F])').sub
65
66 __unicodes = re.compile(ur'\\[0-9a-fA-F]{1,6}[\t|\r|\n|\f|\x20]?').sub
67
68 @staticmethod
70 """
71 normalizes x, namely:
72
73 - remove any \ before non unicode sequences (0-9a-zA-Z) so for
74 x=="c\olor\" return "color" (unicode escape sequences should have
75 been resolved by the tokenizer already)
76 - lowercase
77 """
78 if x:
79 def removeescape(matchobj):
80 return matchobj.group(0)[1:]
81 x = Base.__escapes(removeescape, x)
82 return x.lower()
83 else:
84 return x
85
87 "raises xml.dom.NoModificationAllowedErr if rule/... is readonly"
88 if hasattr(self, '_readonly') and self._readonly:
89 raise xml.dom.NoModificationAllowedErr(
90 u'%s is readonly.' % self.__class__)
91 return True
92 return False
93
95 """
96 returns tuple (text, dict-of-namespaces) or if no namespaces are
97 in cssText returns (cssText, {})
98
99 used in Selector, SelectorList, CSSStyleRule, CSSMediaRule and
100 CSSStyleSheet
101 """
102 if isinstance(text_namespaces_tuple, tuple):
103 return text_namespaces_tuple[0], _SimpleNamespaces(
104 text_namespaces_tuple[1])
105 else:
106 return text_namespaces_tuple, _SimpleNamespaces()
107
109 """
110 returns tokens of textortokens which may already be tokens in which
111 case simply returns input
112 """
113 if not textortokens:
114 return None
115 elif isinstance(textortokens, basestring):
116
117 return self.__tokenizer2.tokenize(
118 textortokens)
119 elif types.GeneratorType == type(textortokens):
120
121 return textortokens
122 elif isinstance(textortokens, tuple):
123
124 return [textortokens]
125 else:
126
127 return (x for x in textortokens)
128
130 "returns next token in generator tokenizer or the default value"
131 try:
132 return tokenizer.next()
133 except (StopIteration, AttributeError):
134 return default
135
137 "returns type of Tokenizer token"
138 if token:
139 return token[0]
140 else:
141 return None
142
144 "returns value of Tokenizer token"
145 if token and normalize:
146 return Base._normalize(token[1])
147 elif token:
148 return token[1]
149 else:
150 return None
151
153 """
154 for STRING returns the actual content without surrounding "" or ''
155 and without respective escapes, e.g.::
156
157 "with \" char" => with " char
158 """
159 if token:
160 value = token[1]
161 return value.replace('\\'+value[0], value[0])[1:-1]
162 else:
163 return None
164
166 """
167 for URI returns the actual content without surrounding url()
168 or url(""), url('') and without respective escapes, e.g.::
169
170 url("\"") => "
171 """
172 if token:
173 value = token[1][4:-1].strip()
174 if (value[0] in '\'"') and (value[0] == value[-1]):
175
176 value = value.replace('\\'+value[0], value[0])[1:-1]
177 return value
178 else:
179 return None
180
181 - def _tokensupto2(self,
182 tokenizer,
183 starttoken=None,
184 blockstartonly=False,
185 blockendonly=False,
186 mediaendonly=False,
187 importmediaqueryendonly=False,
188 mediaqueryendonly=False,
189 semicolon=False,
190 propertynameendonly=False,
191 propertyvalueendonly=False,
192 propertypriorityendonly=False,
193 selectorattendonly=False,
194 funcendonly=False,
195 listseponly=False,
196 separateEnd=False
197 ):
198 """
199 returns tokens upto end of atrule and end index
200 end is defined by parameters, might be ; } ) or other
201
202 default looks for ending "}" and ";"
203 """
204 ends = u';}'
205 endtypes = ()
206 brace = bracket = parant = 0
207
208 if blockstartonly:
209 ends = u'{'
210 brace = -1
211 elif blockendonly:
212 ends = u'}'
213 brace = 1
214 elif mediaendonly:
215 ends = u'}'
216 brace = 1
217 elif importmediaqueryendonly:
218
219 ends = u';'
220 endtypes = ('STRING',)
221 elif mediaqueryendonly:
222
223
224 ends = u'{'
225 brace = -1
226 endtypes = ('STRING',)
227 elif semicolon:
228 ends = u';'
229 elif propertynameendonly:
230 ends = u':;'
231 elif propertyvalueendonly:
232 ends = u';!'
233 elif propertypriorityendonly:
234 ends = u';'
235 elif selectorattendonly:
236 ends = u']'
237 if starttoken and self._tokenvalue(starttoken) == u'[':
238 bracket = 1
239 elif funcendonly:
240 ends = u')'
241 parant = 1
242 elif listseponly:
243 ends = u','
244
245 resulttokens = []
246 if starttoken:
247 resulttokens.append(starttoken)
248 if tokenizer:
249 for token in tokenizer:
250 typ, val, line, col = token
251 if 'EOF' == typ:
252 resulttokens.append(token)
253 break
254 if u'{' == val:
255 brace += 1
256 elif u'}' == val:
257 brace -= 1
258 elif u'[' == val:
259 bracket += 1
260 elif u']' == val:
261 bracket -= 1
262
263 elif u'(' == val or \
264 Base._prods.FUNCTION == typ:
265 parant += 1
266 elif u')' == val:
267 parant -= 1
268
269 resulttokens.append(token)
270
271 if (brace == bracket == parant == 0) and (
272 val in ends or typ in endtypes):
273 break
274 elif mediaqueryendonly and brace == -1 and (
275 bracket == parant == 0) and typ in endtypes:
276
277 break
278
279 if separateEnd:
280
281 if resulttokens:
282 return resulttokens[:-1], resulttokens[-1]
283 else:
284 return resulttokens, None
285 else:
286 return resulttokens
287
289 """
290 returns string value of t (t may be a string, a list of token tuples
291 or a single tuple in format (type, value, line, col).
292 Mainly used to get a string value of t for error messages.
293 """
294 if not t:
295 return u''
296 elif isinstance(t, basestring):
297 return t
298 else:
299 return u''.join([x[1] for x in t])
300
302 """
303 adds default productions if not already present, used by
304 _parse only
305
306 each production should return the next expected token
307 normaly a name like "uri" or "EOF"
308 some have no expectation like S or COMMENT, so simply return
309 the current value of self.__expected
310 """
311 def ATKEYWORD(expected, seq, token, tokenizer=None):
312 "TODO: add default impl for unexpected @rule?"
313 return expected
314
315 def COMMENT(expected, seq, token, tokenizer=None):
316 "default implementation for COMMENT token adds CSSCommentRule"
317 seq.append(cssutils.css.CSSComment([token]))
318 return expected
319
320 def S(expected, seq, token, tokenizer=None):
321 "default implementation for S token, does nothing"
322 return expected
323
324 def EOF(expected=None, seq=None, token=None, tokenizer=None):
325 "default implementation for EOF token"
326 return 'EOF'
327
328 p = {'ATKEYWORD': ATKEYWORD,
329 'COMMENT': COMMENT,
330 'S': S,
331 'EOF': EOF
332 }
333 p.update(productions)
334 return p
335
336 - def _parse(self, expected, seq, tokenizer, productions, default=None,
337 new=None):
338 """
339 puts parsed tokens in seq by calling a production with
340 (seq, tokenizer, token)
341
342 expected
343 a name what token or value is expected next, e.g. 'uri'
344 seq
345 to add rules etc to
346 tokenizer
347 call tokenizer.next() to get next token
348 productions
349 callbacks {tokentype: callback}
350 default
351 default callback if tokentype not in productions
352 new
353 used to init default productions
354
355 returns (wellformed, expected) which the last prod might have set
356 """
357 wellformed = True
358 if tokenizer:
359 prods = self._adddefaultproductions(productions, new)
360 for token in tokenizer:
361 p = prods.get(token[0], default)
362 if p:
363 expected = p(expected, seq, token, tokenizer)
364 else:
365 wellformed = False
366 self._log.error(u'Unexpected token (%s, %s, %s, %s)' % token)
367 return wellformed, expected
368
371 """
372 Base class for new seq handling, used by Selector for now only
373 """
376
378 """
379 sets newseq and makes it readonly
380 """
381 newseq._readonly = True
382 self._seq = newseq
383
384 seq = property(lambda self: self._seq, doc="seq for most classes")
385
387 "get a writeable Seq() which is added later"
388 return Seq(readonly=readonly)
389
391 """
392 adds default productions if not already present, used by
393 _parse only
394
395 each production should return the next expected token
396 normaly a name like "uri" or "EOF"
397 some have no expectation like S or COMMENT, so simply return
398 the current value of self.__expected
399 """
400 def ATKEYWORD(expected, seq, token, tokenizer=None):
401 "default impl for unexpected @rule"
402 if expected != 'EOF':
403
404 rule = cssutils.css.CSSUnknownRule()
405 rule.cssText = self._tokensupto2(tokenizer, token)
406 if rule.wellformed:
407 seq.append(rule, cssutils.css.CSSRule.UNKNOWN_RULE,
408 line=token[2], col=token[3])
409 return expected
410 else:
411 new['wellformed'] = False
412 self._log.error(u'Expected EOF.', token=token)
413 return expected
414
415 def COMMENT(expected, seq, token, tokenizer=None):
416 "default impl, adds CSSCommentRule if not token == EOF"
417 if expected == 'EOF':
418 new['wellformed'] = False
419 self._log.error(u'Expected EOF but found comment.', token=token)
420 seq.append(cssutils.css.CSSComment([token]), 'COMMENT')
421 return expected
422
423 def S(expected, seq, token, tokenizer=None):
424 "default impl, does nothing if not token == EOF"
425 if expected == 'EOF':
426 new['wellformed'] = False
427 self._log.error(u'Expected EOF but found whitespace.', token=token)
428 return expected
429
430 def EOF(expected=None, seq=None, token=None, tokenizer=None):
431 "default implementation for EOF token"
432 return 'EOF'
433
434 defaultproductions = {'ATKEYWORD': ATKEYWORD,
435 'COMMENT': COMMENT,
436 'S': S,
437 'EOF': EOF
438 }
439 defaultproductions.update(productions)
440 return defaultproductions
441
442
443 -class Seq(object):
444 """
445 property seq of Base2 inheriting classes, holds a list of Item objects.
446
447 used only by Selector for now
448
449 is normally readonly, only writable during parsing
450 """
452 """
453 only way to write to a Seq is to initialize it with new items
454 each itemtuple has (value, type, line) where line is optional
455 """
456 self._seq = []
457 self._readonly = readonly
458
461
464
467
469 return iter(self._seq)
470
472 return len(self._seq)
473
474 - def append(self, val, typ, line=None, col=None):
475 "if not readonly add new Item()"
476 if self._readonly:
477 raise AttributeError('Seq is readonly.')
478 else:
479 self._seq.append(Item(val, typ, line, col))
480
481 - def replace(self, index=-1, val=None, typ=None, line=None, col=None):
482 """
483 if not readonly replace Item at index with new Item or
484 simply replace value or type
485 """
486 if self._readonly:
487 raise AttributeError('Seq is readonly.')
488 else:
489 self._seq[index] = Item(val, typ, line, col)
490
492 "returns a repr same as a list of tuples of (value, type)"
493 return u'cssutils.%s.%s([\n %s])' % (self.__module__,
494 self.__class__.__name__,
495 u',\n '.join([u'(%r, %r)' % (item.type, item.value)
496 for item in self._seq]
497 ))
499 return "<cssutils.%s.%s object length=%r at 0x%x>" % (
500 self.__module__, self.__class__.__name__, len(self), id(self))
501
503 """
504 an item in the seq list of classes (successor to tuple items in old seq)
505
506 each item has attributes:
507
508 type
509 a sematic type like "element", "attribute"
510 value
511 the actual value which may be a string, number etc or an instance
512 of e.g. a CSSComment
513 *line*
514 **NOT IMPLEMENTED YET, may contain the line in the source later**
515 """
516 - def __init__(self, value, type, line=None, col=None):
517 self.__value = value
518 self.__type = type
519 self.__line = line
520 self.__col = col
521
522 type = property(lambda self: self.__type)
523 value = property(lambda self: self.__value)
524 line = property(lambda self: self.__line)
525 col = property(lambda self: self.__col)
526
528 return "%s.%s(value=%r, type=%r, line=%r, col=%r)" % (
529 self.__module__, self.__class__.__name__,
530 self.__value, self.__type, self.__line, self.__col)
531
534 """
535 (EXPERIMENTAL)
536 A base class used for list classes like css.SelectorList or
537 stylesheets.MediaList
538
539 adds list like behaviour running on inhering class' property ``seq``
540
541 - item in x => bool
542 - len(x) => integer
543 - get, set and del x[i]
544 - for item in x
545 - append(item)
546
547 some methods must be overwritten in inheriting class
548 """
551
554
557
559 return self.seq[index]
560
562 def gen():
563 for x in self.seq:
564 yield x
565 return gen()
566
569
571 "must be overwritten"
572 raise NotImplementedError
573
575 "must be overwritten"
576 raise NotImplementedError
577
580 """This is a decorator which can be used to mark functions
581 as deprecated. It will result in a warning being emitted
582 when the function is used.
583
584 It accepts a single paramter ``msg`` which is shown with the warning.
585 It should contain information which function or method to use instead.
586 """
589
591 def newFunc(*args, **kwargs):
592 import warnings
593 warnings.warn("Call to deprecated method %r. %s" %
594 (func.__name__, self.msg),
595 category=DeprecationWarning,
596 stacklevel=2)
597 return func(*args, **kwargs)
598 newFunc.__name__ = func.__name__
599 newFunc.__doc__ = func.__doc__
600 newFunc.__dict__.update(func.__dict__)
601 return newFunc
602
605 """
606 A dictionary like wrapper for @namespace rules used in a CSSStyleSheet.
607 Works on effective namespaces, so e.g. if::
608
609 @namespace p1 "uri";
610 @namespace p2 "uri";
611
612 only the second rule is effective and kept.
613
614 namespaces
615 a dictionary {prefix: namespaceURI} containing the effective namespaces
616 only. These are the latest set in the CSSStyleSheet.
617 parentStyleSheet
618 the parent CSSStyleSheet
619 """
620 - def __init__(self, parentStyleSheet, *args):
623
626
642
644 try:
645 return self.namespaces[prefix]
646 except KeyError, e:
647 raise xml.dom.NamespaceErr('Prefix %r not found.' % prefix)
648
651
654
670
677
685
686 namespaces = property(__getNamespaces,
687 doc=u'Holds only effective @namespace rules in self.parentStyleSheets'
688 '@namespace rules.')
689
690 - def get(self, prefix, default):
692
695
698
701
703 """
704 returns effective prefix for given namespaceURI or raises IndexError
705 if this cannot be found"""
706 for prefix, uri in self.namespaces.items():
707 if uri == namespaceURI:
708 return prefix
709 raise IndexError(u'NamespaceURI %r not found.' % namespaceURI)
710
712 return u"<cssutils.util.%s object parentStyleSheet=%r namespaces=%r "\
713 u"at 0x%x>" % (
714 self.__class__.__name__, str(self.parentStyleSheet),
715 self.namespaces, id(self))
716
719 """
720 namespaces used in objects like Selector as long as they are not connected
721 to a CSSStyleSheet
722 """
724 self.__namespaces = dict(*args)
725
728
729 namespaces = property(lambda self: self.__namespaces,
730 doc=u'Dict Wrapper for self.sheets @namespace rules.')
731
733 return u"<cssutils.util.%s object namespaces=%r at 0x%x>" % (
734 self.__class__.__name__, self.namespaces, id(self))
735
737 return u"cssutils.util.%s(%r)" % (self.__class__.__name__,
738 self.namespaces)
739
742 """Retrieve text from url using explicit or detected encoding via encutils
743 """
744 try:
745 req = urllib2.Request(url)
746 res = urllib2.urlopen(req)
747 except ValueError, e:
748
749 cssutils.log.warn(u'Error opening url=%r: %s' % (url, e.message),
750 error=ValueError)
751 except urllib2.HTTPError, e:
752
753 cssutils.log.warn(u'Error opening url=%r: %s %s' % (url, e.code, e.msg),
754 error=e)
755 except IOError, e:
756
757 cssutils.log.warn(u'Error opening url=%r: %r' % (url, e.args),
758 error=e)
759 else:
760 if res:
761
762 url = res.geturl()
763
764 if not encoding:
765
766 import encutils
767 media_type, encoding = encutils.getHTTPInfo(res)
768 if media_type != u'text/css':
769 cssutils.log.warn(u'Unexpected media type opening url=%s: %r != "text/css"' %
770 (url, media_type))
771 try:
772 return codecs.lookup("css")[2](res, encoding=encoding).read()
773 except urllib2.HTTPError, e:
774
775 cssutils.log.warn(u'Error reading url=%r: %s %s' % (url, e.code, e.msg),
776 error=e)
777 except IOError, e:
778 cssutils.log.warn(u'Error opening url=%r: %r' % (url, e.args),
779 error=e)
780