1 """base classes and helper functions for css and stylesheets packages
2 """
3 __all__ = []
4 __docformat__ = 'restructuredtext'
5 __version__ = '$Id: util.py 1395 2008-07-27 13:30:13Z cthedot $'
6
7 import codecs
8 from itertools import ifilter
9 import re
10 import types
11 import urllib2
12 import xml.dom
13 import cssutils
14 from tokenize2 import Tokenizer
15
16 import encutils
17
18 -class Base(object):
19 """
20 Base class for most CSS and StyleSheets classes
21
22 **Superceded by Base2 which is used for new seq handling class.**
23 See cssutils.util.Base2
24
25 Contains helper methods for inheriting classes helping parsing
26
27 ``_normalize`` is static as used by Preferences.
28 """
29 __tokenizer2 = Tokenizer()
30
31 _log = cssutils.log
32 _prods = cssutils.tokenize2.CSSProductions
33
34
35
36
37 _SHORTHANDPROPERTIES = {
38 u'background': [],
39 u'background-position': [],
40 u'border': [],
41 u'border-left': [],
42 u'border-right': [],
43 u'border-top': [],
44 u'border-bottom': [],
45
46
47
48 u'cue': [],
49 u'font': [],
50 u'list-style': [],
51
52 u'outline': [],
53
54 u'pause': []
55 }
56
57
58 __simpleescapes = re.compile(ur'(\\[^0-9a-fA-F])').sub
59
60 @staticmethod
62 """
63 normalizes x, namely:
64
65 - remove any \ before non unicode sequences (0-9a-zA-Z) so for
66 x=="c\olor\" return "color" (unicode escape sequences should have
67 been resolved by the tokenizer already)
68 - lowercase
69 """
70 if x:
71 def removeescape(matchobj):
72 return matchobj.group(0)[1:]
73 x = Base.__simpleescapes(removeescape, x)
74 return x.lower()
75 else:
76 return x
77
79 "raises xml.dom.NoModificationAllowedErr if rule/... is readonly"
80 if hasattr(self, '_readonly') and self._readonly:
81 raise xml.dom.NoModificationAllowedErr(
82 u'%s is readonly.' % self.__class__)
83 return True
84 return False
85
87 """
88 returns tuple (text, dict-of-namespaces) or if no namespaces are
89 in cssText returns (cssText, {})
90
91 used in Selector, SelectorList, CSSStyleRule, CSSMediaRule and
92 CSSStyleSheet
93 """
94 if isinstance(text_namespaces_tuple, tuple):
95 return text_namespaces_tuple[0], _SimpleNamespaces(
96 text_namespaces_tuple[1])
97 else:
98 return text_namespaces_tuple, _SimpleNamespaces()
99
101 """
102 returns tokens of textortokens which may already be tokens in which
103 case simply returns input
104 """
105 if not textortokens:
106 return None
107 elif isinstance(textortokens, basestring):
108
109 return self.__tokenizer2.tokenize(
110 textortokens)
111 elif types.GeneratorType == type(textortokens):
112
113 return textortokens
114 elif isinstance(textortokens, tuple):
115
116 return [textortokens]
117 else:
118
119 return (x for x in textortokens)
120
122 "returns next token in generator tokenizer or the default value"
123 try:
124 return tokenizer.next()
125 except (StopIteration, AttributeError):
126 return default
127
129 "returns type of Tokenizer token"
130 if token:
131 return token[0]
132 else:
133 return None
134
136 "returns value of Tokenizer token"
137 if token and normalize:
138 return Base._normalize(token[1])
139 elif token:
140 return token[1]
141 else:
142 return None
143
145 """
146 for STRING returns the actual content without surrounding "" or ''
147 and without respective escapes, e.g.::
148
149 "with \" char" => with " char
150 """
151 if token:
152 value = token[1]
153 return value.replace('\\'+value[0], value[0])[1:-1]
154 else:
155 return None
156
158 """
159 for URI returns the actual content without surrounding url()
160 or url(""), url('') and without respective escapes, e.g.::
161
162 url("\"") => "
163 """
164 if token:
165 value = token[1][4:-1].strip()
166 if (value[0] in '\'"') and (value[0] == value[-1]):
167
168 value = value.replace('\\'+value[0], value[0])[1:-1]
169 return value
170 else:
171 return None
172
173 - def _tokensupto2(self,
174 tokenizer,
175 starttoken=None,
176 blockstartonly=False,
177 blockendonly=False,
178 mediaendonly=False,
179 importmediaqueryendonly=False,
180 mediaqueryendonly=False,
181 semicolon=False,
182 propertynameendonly=False,
183 propertyvalueendonly=False,
184 propertypriorityendonly=False,
185 selectorattendonly=False,
186 funcendonly=False,
187 listseponly=False,
188 separateEnd=False
189 ):
190 """
191 returns tokens upto end of atrule and end index
192 end is defined by parameters, might be ; } ) or other
193
194 default looks for ending "}" and ";"
195 """
196 ends = u';}'
197 endtypes = ()
198 brace = bracket = parant = 0
199
200 if blockstartonly:
201 ends = u'{'
202 brace = -1
203 elif blockendonly:
204 ends = u'}'
205 brace = 1
206 elif mediaendonly:
207 ends = u'}'
208 brace = 1
209 elif importmediaqueryendonly:
210
211 ends = u';'
212 endtypes = ('STRING',)
213 elif mediaqueryendonly:
214
215
216 ends = u'{'
217 brace = -1
218 endtypes = ('STRING',)
219 elif semicolon:
220 ends = u';'
221 elif propertynameendonly:
222 ends = u':;'
223 elif propertyvalueendonly:
224 ends = u';!'
225 elif propertypriorityendonly:
226 ends = u';'
227 elif selectorattendonly:
228 ends = u']'
229 if starttoken and self._tokenvalue(starttoken) == u'[':
230 bracket = 1
231 elif funcendonly:
232 ends = u')'
233 parant = 1
234 elif listseponly:
235 ends = u','
236
237 resulttokens = []
238 if starttoken:
239 resulttokens.append(starttoken)
240 if tokenizer:
241 for token in tokenizer:
242 typ, val, line, col = token
243 if 'EOF' == typ:
244 resulttokens.append(token)
245 break
246 if u'{' == val:
247 brace += 1
248 elif u'}' == val:
249 brace -= 1
250 elif u'[' == val:
251 bracket += 1
252 elif u']' == val:
253 bracket -= 1
254
255 elif u'(' == val or \
256 Base._prods.FUNCTION == typ:
257 parant += 1
258 elif u')' == val:
259 parant -= 1
260
261 resulttokens.append(token)
262
263 if (brace == bracket == parant == 0) and (
264 val in ends or typ in endtypes):
265 break
266 elif mediaqueryendonly and brace == -1 and (
267 bracket == parant == 0) and typ in endtypes:
268
269 break
270
271 if separateEnd:
272
273 if resulttokens:
274 return resulttokens[:-1], resulttokens[-1]
275 else:
276 return resulttokens, None
277 else:
278 return resulttokens
279
281 """
282 returns string value of t (t may be a string, a list of token tuples
283 or a single tuple in format (type, value, line, col).
284 Mainly used to get a string value of t for error messages.
285 """
286 if not t:
287 return u''
288 elif isinstance(t, basestring):
289 return t
290 else:
291 return u''.join([x[1] for x in t])
292
294 """
295 adds default productions if not already present, used by
296 _parse only
297
298 each production should return the next expected token
299 normaly a name like "uri" or "EOF"
300 some have no expectation like S or COMMENT, so simply return
301 the current value of self.__expected
302 """
303 def ATKEYWORD(expected, seq, token, tokenizer=None):
304 "TODO: add default impl for unexpected @rule?"
305 if expected != 'EOF':
306
307 rule = cssutils.css.CSSUnknownRule()
308 rule.cssText = self._tokensupto2(tokenizer, token)
309 if rule.wellformed:
310 seq.append(rule)
311 return expected
312 else:
313 new['wellformed'] = False
314 self._log.error(u'Expected EOF.', token=token)
315 return expected
316
317 def COMMENT(expected, seq, token, tokenizer=None):
318 "default implementation for COMMENT token adds CSSCommentRule"
319 seq.append(cssutils.css.CSSComment([token]))
320 return expected
321
322 def S(expected, seq, token, tokenizer=None):
323 "default implementation for S token, does nothing"
324 return expected
325
326 def EOF(expected=None, seq=None, token=None, tokenizer=None):
327 "default implementation for EOF token"
328 return 'EOF'
329
330 p = {'ATKEYWORD': ATKEYWORD,
331 'COMMENT': COMMENT,
332 'S': S,
333 'EOF': EOF
334 }
335 p.update(productions)
336 return p
337
338 - def _parse(self, expected, seq, tokenizer, productions, default=None,
339 new=None):
340 """
341 puts parsed tokens in seq by calling a production with
342 (seq, tokenizer, token)
343
344 expected
345 a name what token or value is expected next, e.g. 'uri'
346 seq
347 to add rules etc to
348 tokenizer
349 call tokenizer.next() to get next token
350 productions
351 callbacks {tokentype: callback}
352 default
353 default callback if tokentype not in productions
354 new
355 used to init default productions
356
357 returns (wellformed, expected) which the last prod might have set
358 """
359 wellformed = True
360 if tokenizer:
361 prods = self._adddefaultproductions(productions, new)
362 for token in tokenizer:
363 p = prods.get(token[0], default)
364 if p:
365 expected = p(expected, seq, token, tokenizer)
366 else:
367 wellformed = False
368 self._log.error(u'Unexpected token (%s, %s, %s, %s)' % token)
369 return wellformed, expected
370
373 """
374 Base class for new seq handling, used by Selector for now only
375 """
378
380 """
381 sets newseq and makes it readonly
382 """
383 newseq._readonly = True
384 self._seq = newseq
385
386 seq = property(lambda self: self._seq, doc="seq for most classes")
387
389 "get a writeable Seq() which is added later"
390 return Seq(readonly=readonly)
391
393 """
394 adds default productions if not already present, used by
395 _parse only
396
397 each production should return the next expected token
398 normaly a name like "uri" or "EOF"
399 some have no expectation like S or COMMENT, so simply return
400 the current value of self.__expected
401 """
402 def ATKEYWORD(expected, seq, token, tokenizer=None):
403 "default impl for unexpected @rule"
404 if expected != 'EOF':
405
406 rule = cssutils.css.CSSUnknownRule()
407 rule.cssText = self._tokensupto2(tokenizer, token)
408 if rule.wellformed:
409 seq.append(rule, cssutils.css.CSSRule.UNKNOWN_RULE,
410 line=token[2], col=token[3])
411 return expected
412 else:
413 new['wellformed'] = False
414 self._log.error(u'Expected EOF.', token=token)
415 return expected
416
417 def COMMENT(expected, seq, token, tokenizer=None):
418 "default impl, adds CSSCommentRule if not token == EOF"
419 if expected == 'EOF':
420 new['wellformed'] = False
421 self._log.error(u'Expected EOF but found comment.', token=token)
422 seq.append(cssutils.css.CSSComment([token]), 'COMMENT')
423 return expected
424
425 def S(expected, seq, token, tokenizer=None):
426 "default impl, does nothing if not token == EOF"
427 if expected == 'EOF':
428 new['wellformed'] = False
429 self._log.error(u'Expected EOF but found whitespace.', token=token)
430 return expected
431
432 def EOF(expected=None, seq=None, token=None, tokenizer=None):
433 "default implementation for EOF token"
434 return 'EOF'
435
436 defaultproductions = {'ATKEYWORD': ATKEYWORD,
437 'COMMENT': COMMENT,
438 'S': S,
439 'EOF': EOF
440 }
441 defaultproductions.update(productions)
442 return defaultproductions
443
444
445 -class Seq(object):
446 """
447 property seq of Base2 inheriting classes, holds a list of Item objects.
448
449 used only by Selector for now
450
451 is normally readonly, only writable during parsing
452 """
454 """
455 only way to write to a Seq is to initialize it with new items
456 each itemtuple has (value, type, line) where line is optional
457 """
458 self._seq = []
459 self._readonly = readonly
460
463
466
469
471 return iter(self._seq)
472
474 return len(self._seq)
475
476 - def append(self, val, typ, line=None, col=None):
477 "if not readonly add new Item()"
478 if self._readonly:
479 raise AttributeError('Seq is readonly.')
480 else:
481 self._seq.append(Item(val, typ, line, col))
482
484 "if not readonly add item which must be an Item"
485 if self._readonly:
486 raise AttributeError('Seq is readonly.')
487 else:
488 self._seq.append(item)
489
490 - def replace(self, index=-1, val=None, typ=None, line=None, col=None):
491 """
492 if not readonly replace Item at index with new Item or
493 simply replace value or type
494 """
495 if self._readonly:
496 raise AttributeError('Seq is readonly.')
497 else:
498 self._seq[index] = Item(val, typ, line, col)
499
501 "returns a repr same as a list of tuples of (value, type)"
502 return u'cssutils.%s.%s([\n %s])' % (self.__module__,
503 self.__class__.__name__,
504 u',\n '.join([u'(%r, %r)' % (item.type, item.value)
505 for item in self._seq]
506 ))
508 return "<cssutils.%s.%s object length=%r at 0x%x>" % (
509 self.__module__, self.__class__.__name__, len(self), id(self))
510
512 """
513 an item in the seq list of classes (successor to tuple items in old seq)
514
515 each item has attributes:
516
517 type
518 a sematic type like "element", "attribute"
519 value
520 the actual value which may be a string, number etc or an instance
521 of e.g. a CSSComment
522 *line*
523 **NOT IMPLEMENTED YET, may contain the line in the source later**
524 """
525 - def __init__(self, value, type, line=None, col=None):
526 self.__value = value
527 self.__type = type
528 self.__line = line
529 self.__col = col
530
531 type = property(lambda self: self.__type)
532 value = property(lambda self: self.__value)
533 line = property(lambda self: self.__line)
534 col = property(lambda self: self.__col)
535
537 return "%s.%s(value=%r, type=%r, line=%r, col=%r)" % (
538 self.__module__, self.__class__.__name__,
539 self.__value, self.__type, self.__line, self.__col)
540
543 """
544 (EXPERIMENTAL)
545 A base class used for list classes like css.SelectorList or
546 stylesheets.MediaList
547
548 adds list like behaviour running on inhering class' property ``seq``
549
550 - item in x => bool
551 - len(x) => integer
552 - get, set and del x[i]
553 - for item in x
554 - append(item)
555
556 some methods must be overwritten in inheriting class
557 """
560
563
566
568 return self.seq[index]
569
571 def gen():
572 for x in self.seq:
573 yield x
574 return gen()
575
578
580 "must be overwritten"
581 raise NotImplementedError
582
584 "must be overwritten"
585 raise NotImplementedError
586
589 """
590 A dictionary like wrapper for @namespace rules used in a CSSStyleSheet.
591 Works on effective namespaces, so e.g. if::
592
593 @namespace p1 "uri";
594 @namespace p2 "uri";
595
596 only the second rule is effective and kept.
597
598 namespaces
599 a dictionary {prefix: namespaceURI} containing the effective namespaces
600 only. These are the latest set in the CSSStyleSheet.
601 parentStyleSheet
602 the parent CSSStyleSheet
603 """
604 - def __init__(self, parentStyleSheet, *args):
607
610
626
628 try:
629 return self.namespaces[prefix]
630 except KeyError, e:
631 raise xml.dom.NamespaceErr('Prefix %r not found.' % prefix)
632
635
638
654
661
669
670 namespaces = property(__getNamespaces,
671 doc=u'Holds only effective @namespace rules in self.parentStyleSheets'
672 '@namespace rules.')
673
674 - def get(self, prefix, default):
676
679
682
685
687 """
688 returns effective prefix for given namespaceURI or raises IndexError
689 if this cannot be found"""
690 for prefix, uri in self.namespaces.items():
691 if uri == namespaceURI:
692 return prefix
693 raise IndexError(u'NamespaceURI %r not found.' % namespaceURI)
694
696 return u"<cssutils.util.%s object parentStyleSheet=%r at 0x%x>" % (
697 self.__class__.__name__, str(self.parentStyleSheet), id(self))
698
701 """
702 namespaces used in objects like Selector as long as they are not connected
703 to a CSSStyleSheet
704 """
706 self.__namespaces = dict(*args)
707
710
711 namespaces = property(lambda self: self.__namespaces,
712 doc=u'Dict Wrapper for self.sheets @namespace rules.')
713
715 return u"<cssutils.util.%s object namespaces=%r at 0x%x>" % (
716 self.__class__.__name__, self.namespaces, id(self))
717
719 return u"cssutils.util.%s(%r)" % (self.__class__.__name__,
720 self.namespaces)
721
724 """Retrieve data from ``url``. cssutils default implementation of fetch
725 URL function.
726
727 Returns ``(encoding, string)`` or ``None``
728 """
729 try:
730 res = urllib2.urlopen(url)
731 except OSError, e:
732
733 cssutils.log.warn(e, error=OSError)
734 except (OSError, ValueError), e:
735
736 cssutils.log.warn(u'ValueError, %s' % e.message, error=ValueError)
737 except urllib2.HTTPError, e:
738
739 cssutils.log.warn(u'HTTPError opening url=%r: %s %s' %
740 (url, e.code, e.msg), error=e)
741 except urllib2.URLError, e:
742
743 cssutils.log.warn(u'URLError, %s' % e.reason, error=e)
744 else:
745 if res:
746 mimeType, encoding = encutils.getHTTPInfo(res)
747 if mimeType != u'text/css':
748 cssutils.log.error(u'Expected "text/css" mime type for url=%s but found: %r' %
749 (url, mimeType), error=ValueError)
750 return encoding, res.read()
751
752 -def _readUrl(url, fetcher=None, overrideEncoding=None, parentEncoding=None):
753 """
754 Read cssText from url and decode it using all relevant methods (HTTP
755 header, BOM, @charset). Returns
756
757 - encoding used to decode text (which is needed to set encoding of
758 stylesheet properly)
759 - type of encoding (how it was retrieved, see list below)
760 - decodedCssText
761
762 ``fetcher``
763 see cssutils.registerFetchUrl for details
764 ``overrideEncoding``
765 If given this encoding is used and all other encoding information is
766 ignored (HTTP, BOM etc)
767 ``parentEncoding``
768 Encoding of parent stylesheet (while e.g. reading @import references sheets)
769 or document if available.
770
771 Priority or encoding information
772 --------------------------------
773 **cssutils only**: 0. overrideEncoding
774
775 1. An HTTP "charset" parameter in a "Content-Type" field (or similar parameters in other protocols)
776 2. BOM and/or @charset (see below)
777 3. <link charset=""> or other metadata from the linking mechanism (if any)
778 4. charset of referring style sheet or document (if any)
779 5. Assume UTF-8
780
781 """
782 enctype = None
783
784 if not fetcher:
785 fetcher = _defaultFetcher
786 r = fetcher(url)
787 if r and len(r) == 2 and r[1] is not None:
788 httpEncoding, content = r
789
790 if overrideEncoding:
791 enctype = 0
792 encoding = overrideEncoding
793 elif httpEncoding:
794 enctype = 1
795 encoding = httpEncoding
796 else:
797
798 contentEncoding, explicit = cssutils.codec.detectencoding_str(content)
799 if explicit:
800 enctype = 2
801 encoding = contentEncoding
802 elif parentEncoding:
803 enctype = 4
804
805 encoding = parentEncoding
806 else:
807 enctype = 5
808 encoding = 'utf-8'
809
810 try:
811
812 decodedCssText = codecs.lookup("css")[1](content, encoding=encoding)[0]
813 except UnicodeDecodeError, e:
814 cssutils.log.warn(e, neverraise=True)
815 decodedCssText = None
816
817 return encoding, enctype, decodedCssText
818 else:
819 return None, None, None
820