B
    2*™\P)  ã               @   sä  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
 G dd„ dƒZG dd„ deejƒZG dd	„ d	eejƒZG d
d„ deejƒZG dd„ deƒZG dd„ deejƒZG dd„ deƒZG dd„ deejƒZG dd„ deejƒZG dd„ deejƒZG dd„ deejƒZG dd„ deƒZG dd„ deejƒZG dd„ deejƒZG d d!„ d!eejƒZG d"d#„ d#eejƒZG d$d%„ d%eejƒZG d&d'„ d'eejƒZG d(d)„ d)e	ƒZG d*d+„ d+ejƒZG d,d-„ d-ejƒZ e!d.kràe "¡  dS )/é    N)Úsupport)ÚBaseHTTPRequestHandlerÚ
HTTPServerc               @   s<   e Zd ZdZdZg Zg Zdd„ Zdd„ Zdd„ Z	d	d
„ Z
dS )ÚBaseRobotTestÚ Ztest_robotparserc             C   s,   t  | j¡ ¡ }tj ¡ | _| j |¡ d S )N)	ÚioÚStringIOÚ
robots_txtÚ	readlinesÚurllibÚrobotparserÚRobotFileParserÚparserÚparse)ÚselfÚlines© r   ú@C:\ALexclude\prg\programme\Python37\Lib\test\test_robotparser.pyÚsetUp   s    zBaseRobotTest.setUpc             C   s$   t |tƒr|\}}||fS | j|fS )N)Ú
isinstanceÚtupleÚagent)r   Úurlr   r   r   r   Úget_agent_and_url   s    
zBaseRobotTest.get_agent_and_urlc          
   C   sP   xJ| j D ]@}|  |¡\}}| j||d |  | j ||¡¡ W d Q R X qW d S )N)r   r   )Úgoodr   ÚsubTestÚ
assertTruer   Ú	can_fetch)r   r   r   r   r   r   Útest_good_urls   s    zBaseRobotTest.test_good_urlsc          
   C   sP   xJ| j D ]@}|  |¡\}}| j||d |  | j ||¡¡ W d Q R X qW d S )N)r   r   )Úbadr   r   ÚassertFalser   r   )r   r   r   r   r   r   Útest_bad_urls!   s    zBaseRobotTest.test_bad_urlsN)Ú__name__Ú
__module__Ú__qualname__r	   r   r   r   r   r   r   r!   r   r   r   r   r   
   s   r   c               @   s"   e Zd ZdZddgZdddgZdS )ÚUserAgentWildcardTestz•User-agent: *
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
Disallow: /tmp/ # these will soon disappear
Disallow: /foo.html
    ú/z
/test.htmlz/cyberworld/map/index.htmlz/tmp/xxxz	/foo.htmlN)r"   r#   r$   r	   r   r   r   r   r   r   r%   (   s   r%   c               @   s    e Zd ZdZdddgZdgZdS )ÚCrawlDelayAndCustomAgentTestzå# robots.txt for http://www.example.com/

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow:
    r&   z
/test.html)Zcybermapperz/cyberworld/map/index.htmlz/cyberworld/map/index.htmlN)r"   r#   r$   r	   r   r   r   r   r   r   r'   3   s   
r'   c               @   s   e Zd ZdZg ZdddgZdS )ÚRejectAllRobotsTestz(# go away
User-agent: *
Disallow: /
    z/cyberworld/map/index.htmlr&   z/tmp/N)r"   r#   r$   r	   r   r   r   r   r   r   r(   D   s   r(   c               @   s   e Zd Zdd„ ZdS )ÚBaseRequestRateTestc          
   C   s°   xª| j | j D ]š}|  |¡\}}| j||dt | jrL|  | j |¡| j¡ | jrž|  | j |¡t	j
j¡ |  | j |¡j| jj¡ |  | j |¡j| jj¡ W d Q R X qW d S )N)r   r   )r   r   r   r   Úcrawl_delayÚassertEqualr   Úrequest_rateZassertIsInstancer   r   ÚRequestRateZrequestsZseconds)r   r   r   r   r   r   Útest_request_rateP   s     


z%BaseRequestRateTest.test_request_rateN)r"   r#   r$   r.   r   r   r   r   r)   N   s   r)   c               @   s>   e Zd ZdZdZej dd¡ZdZ	dgZ
ddd	d
dddgZdS )ÚCrawlDelayAndRequestRateTestz’User-agent: figtree
Crawl-delay: 3
Request-rate: 9/30
Disallow: /tmp
Disallow: /a%3cd.html
Disallow: /a%2fb.html
Disallow: /%7ejoe/index.html
    Úfigtreeé	   é   é   )r0   z	/foo.htmlz/tmpz	/tmp.htmlz/tmp/a.htmlz/a%3cd.htmlz/a%3Cd.htmlz/a%2fb.htmlz/~joe/index.htmlN)r"   r#   r$   r	   r   r   r   r-   r,   r*   r   r   r   r   r   r   r/   g   s   	
r/   c               @   s   e Zd ZdZdZdZdS )ÚDifferentAgentTestzFigTree Robot libwww-perl/5.04N)r"   r#   r$   r   r,   r*   r   r   r   r   r4   y   s   r4   c               @   s*   e Zd ZdZdgZddddddgZd	Zd
S )ÚInvalidRequestRateTestzUser-agent: *
Disallow: /tmp/
Disallow: /a%3Cd.html
Disallow: /a/b.html
Disallow: /%7ejoe/index.html
Crawl-delay: 3
Request-rate: 9/banana
    z/tmpz/tmp/z/tmp/a.htmlz/a%3cd.htmlz/a%3Cd.htmlz	/a/b.htmlz/%7Ejoe/index.htmlr3   N)r"   r#   r$   r	   r   r   r*   r   r   r   r   r5      s
   	
r5   c               @   s   e Zd ZdZdgZg ZdS )ÚInvalidCrawlDelayTestz2User-Agent: *
Disallow: /.
Crawl-delay: pears
    z	/foo.htmlN)r"   r#   r$   r	   r   r   r   r   r   r   r6   ‘   s   r6   c               @   s    e Zd ZdZdZdgZdgZdS )ÚAnotherInvalidRequestRateTestzeUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
Request-rate: whale/banana
    Ú	Googlebotz/folder1/myfile.htmlz/folder1/anotherfile.htmlN)r"   r#   r$   r	   r   r   r   r   r   r   r   r7      s   r7   c               @   s   e Zd ZdZdZdgZdS )ÚUserAgentOrderingTestzMUser-agent: Googlebot
Disallow: /

User-agent: Googlebot-Mobile
Allow: /
    r8   z/something.jpgN)r"   r#   r$   r	   r   r   r   r   r   r   r9   ª   s   
r9   c               @   s   e Zd ZdZdS )ÚUserAgentGoogleMobileTestzGooglebot-MobileN)r"   r#   r$   r   r   r   r   r   r:   ¹   s   r:   c               @   s    e Zd ZdZdZdgZdgZdS )ÚGoogleURLOrderingTestzJUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
    Z	googlebotz/folder1/myfile.htmlz/folder1/anotherfile.htmlN)r"   r#   r$   r	   r   r   r   r   r   r   r   r;   ½   s   r;   c               @   s   e Zd ZdZdgZdgZdS )ÚDisallowQueryStringTestz2User-agent: *
Disallow: /some/path?name=value
    z
/some/pathz/some/path?name=valueN)r"   r#   r$   r	   r   r   r   r   r   r   r<   Ê   s   r<   c               @   s   e Zd ZdZdgZdgZdS )ÚUseFirstUserAgentWildcardTestzNUser-agent: *
Disallow: /some/path

User-agent: *
Disallow: /another/path
    z/another/pathz
/some/pathN)r"   r#   r$   r	   r   r   r   r   r   r   r=   Ô   s   r=   c               @   s   e Zd ZdZdgZdgZdS )ÚEmptyQueryStringTestz>User-agent: *
Allow: /some/path?
Disallow: /another/path?
    z/some/path?z/another/path?N)r"   r#   r$   r	   r   r   r   r   r   r   r>   á   s   r>   c               @   s0   e Zd ZdZej dd¡ZdZddgZ	dgZ
dS )	ÚDefaultEntryTestzOUser-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/
    r3   é   é   r&   z
/test.htmlz/cyberworld/map/index.htmlN)r"   r#   r$   r	   r   r   r-   r,   r*   r   r   r   r   r   r   r?   ì   s
   r?   c               @   s   e Zd ZdZdZdd„ ZdS )ÚStringFormattingTestzÆUser-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow: /some/path
    zzUser-agent: cybermapper
Disallow: /some/path

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/

c             C   s   |   t| jƒ| j¡ d S )N)r+   Ústrr   Úexpected_output)r   r   r   r   Útest_string_formatting  s    z+StringFormattingTest.test_string_formattingN)r"   r#   r$   r	   rD   rE   r   r   r   r   rB   ù   s   
rB   c               @   s   e Zd Zdd„ Zdd„ ZdS )ÚRobotHandlerc             C   s   |   dd¡ d S )Ni“  zForbidden access)Z
send_error)r   r   r   r   Údo_GET  s    zRobotHandler.do_GETc             G   s   d S )Nr   )r   ÚformatÚargsr   r   r   Úlog_message  s    zRobotHandler.log_messageN)r"   r#   r$   rG   rJ   r   r   r   r   rF     s   rF   c               @   s*   e Zd Zdd„ Zdd„ Zejdd„ ƒZdS )ÚPasswordProtectedSiteTestCasec             C   sB   t tjdftƒ| _tjd| jjddid| _d| j_	| j 
¡  d S )Nr   zHTTPServer servingZpoll_intervalg{®Gáz„?)ÚnameÚtargetÚkwargsT)r   r   ÚHOSTrF   ÚserverÚ	threadingZThreadZserve_foreverÚtZdaemonÚstart)r   r   r   r   r     s    z#PasswordProtectedSiteTestCase.setUpc             C   s"   | j  ¡  | j ¡  | j  ¡  d S )N)rP   ZshutdownrR   ÚjoinZserver_close)r   r   r   r   ÚtearDown,  s    

z&PasswordProtectedSiteTestCase.tearDownc             C   s\   | j j}dtj d t|d ƒ }|d }tj ¡ }| |¡ | 	¡  |  
| d|¡¡ d S )Nzhttp://ú:rA   z/robots.txtÚ*)rP   Zserver_addressr   rO   rC   r   r   r   Zset_urlÚreadr    r   )r   Zaddrr   Z
robots_urlr   r   r   r   ÚtestPasswordProtectedSite1  s    

z7PasswordProtectedSiteTestCase.testPasswordProtectedSiteN)r"   r#   r$   r   rU   r   Zreap_threadsrY   r   r   r   r   rK     s   rK   c               @   sF   e Zd ZdZd e¡Zedd„ ƒZdd„ Zdd„ Z	d	d
„ Z
dd„ ZdS )ÚNetworkTestCasezhttp://www.pythontest.net/z{}elsewhere/robots.txtc          	   C   s@   t  d¡ t  | j¡  tj | j¡| _| j 	¡  W d Q R X d S )NZnetwork)
r   ZrequiresZtransient_internetÚbase_urlr   r   r   r	   r   rX   )Úclsr   r   r   Ú
setUpClassA  s    
zNetworkTestCase.setUpClassc             C   s$   d  | j|tj |¡d sdnd¡S )Nz{}{}{}rA   r&   r   )rH   r[   ÚosÚpathÚsplitext)r   r_   r   r   r   r   H  s    zNetworkTestCase.urlc             C   sV   |   | jj¡ |   | jj¡ |  | j ¡ d¡ |   | j d¡¡ |   | j d¡¡ d S )Nr   rW   )r    r   Údisallow_allÚ	allow_allZassertGreaterÚmtimer*   r,   )r   r   r   r   Ú
test_basicM  s
    zNetworkTestCase.test_basicc             C   s˜   |   | j d|  d¡¡¡ |  | j d| j¡¡ |  | j d|  d¡¡¡ |  | j d|  d¡¡¡ |  | j d|  d¡¡¡ |   | j d| j¡¡ d S )NrW   Z	elsewhereZNutchZbrianZwebstats)r   r   r   r   r    r[   )r   r   r   r   Útest_can_fetchT  s    zNetworkTestCase.test_can_fetchc             C   sf   t j |  d¡¡}| ¡  |  |j¡ |  |j¡ |  	| 
¡ d¡ |  | d¡¡ |  | d¡¡ d S )Nzi-robot.txtr   rW   )r   r   r   r   rX   r   rb   r    ra   r+   rc   ZassertIsNoner*   r,   )r   r   r   r   r   Útest_read_404\  s    zNetworkTestCase.test_read_404N)r"   r#   r$   r[   rH   r	   Úclassmethodr]   r   rd   re   rf   r   r   r   r   rZ   <  s   
rZ   Ú__main__)#r   r^   rQ   ZunittestZurllib.robotparserr   Ztestr   Zhttp.serverr   r   r   ZTestCaser%   r'   r(   r)   r/   r4   r5   r6   r7   r9   r:   r;   r<   r=   r>   r?   rB   rF   rK   rZ   r"   Úmainr   r   r   r   Ú<module>   s:   

	)
