³ò
™›\Kc           @   s¶   d  Z  d d k Z d d k Z d g Z d a d „  Z d d d „  ƒ  YZ d d d „  ƒ  YZ d	 d d
 „  ƒ  YZ d e i	 f d „  ƒ  YZ
 d „  Z d „  Z e d j o e ƒ  n d S(   s<   robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
iÿÿÿÿNt   RobotFileParseri    c         C   s   t  o	 |  GHn d  S(   N(   t   debug(   t   msg(    (    s!   /usr/lib/python2.5/robotparser.pyt   _debug   s     c           B   sb   e  Z d  Z d d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d	 „  Z
 d
 „  Z RS(   ss    This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    t    c         C   s>   g  |  _  d  |  _ t |  _ t |  _ |  i | ƒ d |  _ d  S(   Ni    (   t   entriest   Nonet   default_entryt   Falset   disallow_allt	   allow_allt   set_urlt   last_checked(   t   selft   url(    (    s!   /usr/lib/python2.5/robotparser.pyt   __init__   s    				c         C   s   |  i  S(   s·   Returns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        (   R   (   R   (    (    s!   /usr/lib/python2.5/robotparser.pyt   mtime$   s    c         C   s   d d k  } | i  ƒ  |  _ d S(   sY   Sets the time the robots.txt file was last fetched to the
        current time.

        iÿÿÿÿN(   t   timeR   (   R   R   (    (    s!   /usr/lib/python2.5/robotparser.pyt   modified-   s    c         C   s/   | |  _  t i | ƒ d d !\ |  _ |  _ d S(   s,   Sets the URL referring to a robots.txt file.i   i   N(   R   t   urlparset   hostt   path(   R   R   (    (    s!   /usr/lib/python2.5/robotparser.pyR   5   s    	c         C   sû   t  ƒ  } | i |  i ƒ } g  } | i ƒ  } x+ | o# | i | i ƒ  ƒ | i ƒ  } q0 W| i |  _ |  i d j p |  i d j o t |  _ t	 d ƒ nZ |  i d j o t |  _
 t	 d ƒ n3 |  i d j o" | o t	 d ƒ |  i | ƒ n d S(	   s4   Reads the robots.txt URL and feeds it to the parser.i‘  i“  s   disallow alli  s	   allow alliÈ   s   parse linesN(   t	   URLopenert   openR   t   readlinet   appendt   stript   errcodet   TrueR	   R   R
   t   parse(   R   t   openert   ft   linest   line(    (    s!   /usr/lib/python2.5/robotparser.pyt   read:   s$    	  		
c         C   s1   d | i  j o | |  _ n |  i i | ƒ d  S(   Nt   *(   t
   useragentsR   R   R   (   R   t   entry(    (    s!   /usr/lib/python2.5/robotparser.pyt
   _add_entryN   s    c         C   s«  d } d } t  ƒ  } xZ| D]R} | d } | p_ | d j o! t d | ƒ t  ƒ  } d } q’ | d j o  |  i | ƒ t  ƒ  } d } q’ n | i d ƒ } | d j o | |  } n | i ƒ  } | p q n | i d d ƒ } t | ƒ d j o_| d i ƒ  i ƒ  | d <t i	 | d i ƒ  ƒ | d <| d d j oS | d j o( t d | ƒ |  i | ƒ t  ƒ  } n | i
 i | d ƒ d } qn| d d	 j oF | d j o t d
 | ƒ qV| i i t | d t ƒ ƒ d } qn| d d j o@ | d j o t d
 | ƒ qV| i i t | d t ƒ ƒ qnt d | | d f ƒ q t d | | f ƒ q W| d j o |  i i | ƒ n t d t |  ƒ ƒ d S(   s   parse the input lines from a robots.txt file.
           We allow that a user-agent: line is not preceded by
           one or more blank lines.i    i   s]   line %d: warning: you should insert allow: or disallow: directives below any user-agent: linei   t   #t   :s
   user-agentsP   line %d: warning: you should insert a blank line before any user-agent directivet   disallowsH   line %d: error: you must insert a user-agent: directive before this linet   allows    line %d: warning: unknown key %ss!   line %d: error: malformed line %ss   Parsed rules:
%sN(   t   EntryR   R&   t   findR   t   splitt   lent   lowert   urllibt   unquoteR$   R   t	   rulelinest   RuleLineR   R   R   t   str(   R   R    t   statet
   linenumberR%   R!   t   i(    (    s!   /usr/lib/python2.5/robotparser.pyR   U   sf    	 
	
	

!	c         C   s»   t  d | | f ƒ |  i o t Sn |  i o t Sn t i t i t i | ƒ ƒ d ƒ p d } x2 |  i	 D]' } | i
 | ƒ o | i | ƒ Sqn qn W|  i o |  i i | ƒ Sn t S(   s=   using the parsed robots.txt decide if useragent can fetch urls=   Checking robots.txt allowance for:
  user agent: %s
  url: %si   t   /(   R   R	   R   R
   R   R0   t   quoteR   R1   R   t
   applies_tot	   allowanceR   (   R   t	   useragentR   R%   (    (    s!   /usr/lib/python2.5/robotparser.pyt	   can_fetch•   s    

,
 
c         C   s2   d } x% |  i  D] } | t | ƒ d } q W| S(   NR   s   
(   R   R4   (   R   t   retR%   (    (    s!   /usr/lib/python2.5/robotparser.pyt   __str__ª   s
    
 (   t   __name__t
   __module__t   __doc__R   R   R   R   R"   R&   R   R=   R?   (    (    (    s!   /usr/lib/python2.5/robotparser.pyR       s   							@	R3   c           B   s)   e  Z d  Z d „  Z d „  Z d „  Z RS(   so   A rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.c         C   s>   | d j o | o
 t  } n t i | ƒ |  _ | |  _ d  S(   NR   (   R   R0   R9   R   R;   (   R   R   R;   (    (    s!   /usr/lib/python2.5/robotparser.pyR   ´   s    
c         C   s    |  i  d j p | i |  i  ƒ S(   NR#   (   R   t
   startswith(   R   t   filename(    (    s!   /usr/lib/python2.5/robotparser.pyR:   »   s    c         C   s    |  i  o d p d d |  i S(   Nt   Allowt   Disallows   : (   R;   R   (   R   (    (    s!   /usr/lib/python2.5/robotparser.pyR?   ¾   s    (   R@   RA   RB   R   R:   R?   (    (    (    s!   /usr/lib/python2.5/robotparser.pyR3   ±   s   		R+   c           B   s2   e  Z d  Z d „  Z d „  Z d „  Z d „  Z RS(   s?   An entry has one or more user-agents and zero or more rulelinesc         C   s   g  |  _  g  |  _ d  S(   N(   R$   R2   (   R   (    (    s!   /usr/lib/python2.5/robotparser.pyR   Ä   s    	c         C   sX   d } x# |  i  D] } | d | d } q Wx% |  i D] } | t | ƒ d } q6 W| S(   NR   s   User-agent: s   
(   R$   R2   R4   (   R   R>   t   agentR!   (    (    s!   /usr/lib/python2.5/robotparser.pyR?   È   s    
 
 c         C   sg   | i  d ƒ d i ƒ  } xG |  i D]< } | d j o t Sn | i ƒ  } | | j o t Sq# q# Wt S(   s2   check if this entry applies to the specified agentR8   i    R#   (   R-   R/   R$   R   R   (   R   R<   RG   (    (    s!   /usr/lib/python2.5/robotparser.pyR:   Ð   s    
 c         C   sO   xH |  i  D]= } t | t | ƒ | i f ƒ | i | ƒ o | i Sq
 q
 Wt S(   sZ   Preconditions:
        - our agent applies to this entry
        - filename is URL decoded(   R2   R   R4   R;   R:   R   (   R   RD   R!   (    (    s!   /usr/lib/python2.5/robotparser.pyR;   Ý   s    
 (   R@   RA   RB   R   R?   R:   R;   (    (    (    s!   /usr/lib/python2.5/robotparser.pyR+   Â   s
   			R   c           B   s#   e  Z d  „  Z d „  Z d „  Z RS(   c         G   s    t  i i |  | Œ d |  _ d  S(   NiÈ   (   R0   t   FancyURLopenerR   R   (   R   t   args(    (    s!   /usr/lib/python2.5/robotparser.pyR   è   s    c         C   s   d S(   N(   NN(   R   (   R   R   t   realm(    (    s!   /usr/lib/python2.5/robotparser.pyt   prompt_user_passwdì   s    c         C   s(   | |  _  t i i |  | | | | | ƒ S(   N(   R   R0   RH   t   http_error_default(   R   R   t   fpR   t   errmsgt   headers(    (    s!   /usr/lib/python2.5/robotparser.pyRL   ñ   s    	(   R@   RA   R   RK   RL   (    (    (    s!   /usr/lib/python2.5/robotparser.pyR   ç   s   		c         C   s;   | p
 d } n d } |  | j o	 d GHn
 d | GHHd  S(   Ns   access denieds   access allowedt   faileds   ok (%s)(    (   t   at   bt   ac(    (    s!   /usr/lib/python2.5/robotparser.pyt   _checkö   s    
		c          C   s†  t  ƒ  }  d a |  i d ƒ |  i ƒ  t |  i d d ƒ d ƒ t |  i d d ƒ d ƒ t |  i d d ƒ d ƒ t |  i d	 d ƒ d ƒ t |  i d
 d ƒ d ƒ t |  i d d ƒ d ƒ t |  i d d ƒ d ƒ t |  i d d ƒ d ƒ t |  i d d ƒ d ƒ t |  i d d ƒ d ƒ t |  i d d ƒ d ƒ t |  i d d ƒ d ƒ |  i d ƒ |  i ƒ  t |  i d d ƒ d ƒ d  S(   Ni   s"   http://www.musi-cal.com/robots.txtR#   s   http://www.musi-cal.com/R   i    t   CherryPickerSEs?   http://www.musi-cal.com/cgi-bin/event-search?city=San+Franciscos   CherryPickerSE/1.0s   CherryPickerSE/1.5t   ExtractorPros   http://www.musi-cal.com/blubbat   extractorpros   toolpak/1.1t   spams   http://www.musi-cal.com/searchs#   http://www.musi-cal.com/Musician/mes   http://www.lycos.com/robots.txtt   Mozillas   http://www.lycos.com/search(   R    R   R   R"   RT   R=   (   t   rp(    (    s!   /usr/lib/python2.5/robotparser.pyt   _test  s2    	

t   __main__(    (    (    (   RB   R   R0   t   __all__R   R   R    R3   R+   RH   R   RT   R[   R@   (    (    (    s!   /usr/lib/python2.5/robotparser.pys   <module>   s   		›%		'