Pc@s dZddlZddlZddlZddlZddlZddlZddlZddlZddl Z ddl Z ddl Z ddl Z ddl Z yddlmZWn!ek rddlmZnXddlmZmZmZmZmZmZmZmZmZmZmZmZmZddlmZm Z m!Z!m"Z"e j#d Z$da&de j'dZ(dZ)d e*fd YZ+d e+efd YZ,ej-d Z.dZ/ddDdYZ0ddEdYZ1dZ2ddFdYZ3de3fdYZ4de3fdYZ5de3fdYZ6dZ7de3fdYZ8ddGd YZ9d!e9fd"YZ:d#dHd$YZ;d%e;e3fd&YZ<d'e;e3fd(YZ=d)Z>d*dId+YZ?d,e3e?fd-YZ@d.e3e?fd/YZAd0e3fd1YZBd2eBfd3YZCeDed4r|d5eBfd6YZEnd7e3fd8YZFd9e3fd:YZGd;ZHd<ZId=ZJd>e3fd?YZKd@e3fdAYZLdBeLfdCYZMdS(Js! An extensible library for opening URLs using a variety of protocols The simplest way to use this module is to call the urlopen function, which accepts a string containing a URL or a Request object (described below). It opens the URL and returns the results as file-like object; the returned object has some extra methods described below. The OpenerDirector manages a collection of Handler objects that do all the actual work. Each Handler implements a particular protocol or option. The OpenerDirector is a composite object that invokes the Handlers needed to open the requested URL. For example, the HTTPHandler performs HTTP GET and POST requests and deals with non-error returns. The HTTPRedirectHandler automatically deals with HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler deals with digest authentication. urlopen(url, data=None) -- Basic usage is the same as original urllib. pass the url and optionally data to post to an HTTP URL, and get a file-like object back. One difference is that you can also pass a Request instance instead of URL. Raises a URLError (subclass of IOError); for HTTP errors, raises an HTTPError, which can also be treated as a valid response. build_opener -- Function that creates a new OpenerDirector instance. Will install the default handlers. Accepts one or more Handlers as arguments, either instances or Handler classes that it will instantiate. If one of the argument is a subclass of the default handler, the argument will be installed instead of the default. install_opener -- Installs a new opener as the default opener. objects of interest: OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages the Handler classes, while dealing with requests and responses. Request -- An object that encapsulates the state of a request. The state can be as simple as the URL. It can also include extra HTTP headers, e.g. a User-Agent. BaseHandler -- exceptions: URLError -- A subclass of IOError, individual protocols have their own specific subclass. HTTPError -- Also a valid HTTP response, so you can treat an HTTP error as an exceptional event or valid response. internals: BaseHandler and parent _call_chain conventions Example usage: import urllib2 # set up authentication info authinfo = urllib2.HTTPBasicAuthHandler() authinfo.add_password(realm='PDQ Application', uri='https://mahler:8092/site-updates.py', user='klem', passwd='geheim$parole') proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"}) # build a new opener that adds authentication and caching FTP handlers opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler) # install it urllib2.install_opener(opener) f = urllib2.urlopen('http://www.python.org/') iN(tStringIO( tunwraptunquotet splittypet splithosttquotet addinfourlt splitporttsplittagt splitattrt ftpwrappert splitusert splitpasswdt splitvalue(t localhostt url2pathnamet getproxiest proxy_bypassicCs+tdkrtantj|||S(N(t_openertNonet build_openertopen(turltdatattimeout((s..\python\lib\urllib2.pyturlopenzs  cCs |adS(N(R(topener((s..\python\lib\urllib2.pytinstall_openerstURLErrorcBseZdZdZRS(cCs|f|_||_dS(N(targstreason(tselfR((s..\python\lib\urllib2.pyt__init__s cCs d|jS(Ns(R(R((s..\python\lib\urllib2.pyt__str__s(t__name__t __module__R R!(((s..\python\lib\urllib2.pyRs t HTTPErrorcBs)eZdZejZdZdZRS(sBRaised when HTTP error occurs, but also acts like non-error returncCsV||_||_||_||_||_|dk rR|j||||ndS(N(tcodetmsgthdrstfptfilenameRt_HTTPError__super_init(RRR%R&R'R(((s..\python\lib\urllib2.pyR s      cCsd|j|jfS(NsHTTP Error %s: %s(R%R&(R((s..\python\lib\urllib2.pyR!s(R"R#t__doc__RR R*R!(((s..\python\lib\urllib2.pyR$s  s:\d+$cCs_|j}tj|d}|dkr@|jdd}ntjd|d}|jS(sReturn request-host, as defined by RFC 2965. Variation from RFC: returned value is lowercased, for convenient comparison. ittHost(t get_full_urlturlparset get_headert _cut_port_retsubtlower(trequestRthost((s..\python\lib\urllib2.pyt request_hosts   tRequestcBseZdidedZdZdZdZdZdZ dZ dZ dZ d Z d Zd Zd Zd ZdZdZdZddZdZRS(c Cst||_t|j\|_}d|_d|_d|_d|_||_i|_ x*|j D]\}}|j ||qjWi|_ |dkrt |}n||_||_dS(N(Rt_Request__originalRRttypeR5tportt _tunnel_hostRtheaderstitemst add_headertunredirected_hdrsR6torigin_req_hostt unverifiable( RRRR<R@RAtfragmenttkeytvalue((s..\python\lib\urllib2.pyR s         cCs^|d dkrQ|d}ttd|rQt|d|t||Snt|dS(Ni t _Request__r_tget_(thasattrR7tgetattrtAttributeError(Rtattrtname((s..\python\lib\urllib2.pyt __getattr__s  cCs|jrdSdSdS(NtPOSTtGET(thas_data(R((s..\python\lib\urllib2.pyt get_methods cCs ||_dS(N(R(RR((s..\python\lib\urllib2.pytadd_datascCs |jdk S(N(RR(R((s..\python\lib\urllib2.pyROscCs|jS(N(R(R((s..\python\lib\urllib2.pytget_datascCs|jS(N(R8(R((s..\python\lib\urllib2.pyR.scCsV|jdkrOt|j\|_|_|jdkrOtd|jqOn|jS(Nsunknown url type: %s(R9RRR8t_Request__r_typet ValueError(R((s..\python\lib\urllib2.pytget_types cCsR|jdkrKt|j\|_|_|jrKt|j|_qKn|jS(N(R5RRRSt_Request__r_hostR(R((s..\python\lib\urllib2.pytget_hosts  cCs|jS(N(RV(R((s..\python\lib\urllib2.pyt get_selectorscCsJ|jdkr(|j r(|j|_n||_|j|_||_dS(Nthttps(R9R;R5R8RV(RR5R9((s..\python\lib\urllib2.pyt set_proxys   cCs|j|jkS(N(RVR8(R((s..\python\lib\urllib2.pyt has_proxy scCs|jS(N(R@(R((s..\python\lib\urllib2.pytget_origin_req_hostscCs|jS(N(RA(R((s..\python\lib\urllib2.pytis_unverifiablescCs||j|jscCs||j|jR`RbR0Rg(((s..\python\lib\urllib2.pyR7s(               tOpenerDirectorcBsSeZdZdZdZdZdejdZ ddZ dZ RS(cCsMdt}d|fg|_g|_i|_i|_i|_i|_dS(NsPython-urllib/%ss User-agent(t __version__t addheadersthandlerst handle_opent handle_errortprocess_responsetprocess_request(Rtclient_version((s..\python\lib\urllib2.pyR +s     c Cst|ds(tdt|nt}xet|D]W}|d krSq;n|jd}|| }||d}|jdr|jd|d}||d}yt|}Wntk rnX|j j |i} | |j |%s R<R@RAN(i-i.i/i3(sGETR(i-i.i/( RPtreplaceRR<R=R7R\RR$R.( RRR(R%R&R<tnewurltmt newheaders((s..\python\lib\urllib2.pyRss   c Csd|kr"|jdd}n&d|krD|jdd}ndStj|}|jsyt|}d|d>> _parse_proxy('file:/ftp.example.com/') Traceback (most recent call last): ValueError: proxy URL with no authority: 'file:/ftp.example.com/' The first three items of the returned tuple may be None. Examples of authority parsing: >>> _parse_proxy('proxy.example.com') (None, None, None, 'proxy.example.com') >>> _parse_proxy('proxy.example.com:3128') (None, None, None, 'proxy.example.com:3128') The authority component may optionally include userinfo (assumed to be username:password): >>> _parse_proxy('joe:password@proxy.example.com') (None, 'joe', 'password', 'proxy.example.com') >>> _parse_proxy('joe:password@proxy.example.com:3128') (None, 'joe', 'password', 'proxy.example.com:3128') Same examples, but with URLs instead: >>> _parse_proxy('http://proxy.example.com/') ('http', None, None, 'proxy.example.com') >>> _parse_proxy('http://proxy.example.com:3128/') ('http', None, None, 'proxy.example.com:3128') >>> _parse_proxy('http://joe:password@proxy.example.com/') ('http', 'joe', 'password', 'proxy.example.com') >>> _parse_proxy('http://joe:password@proxy.example.com:3128') ('http', 'joe', 'password', 'proxy.example.com:3128') Everything after the authority is ignored: >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128') ('ftp', 'joe', 'password', 'proxy.example.com') Test for no trailing '/' case: >>> _parse_proxy('http://joe:password@proxy.example.com') ('http', 'joe', 'password', 'proxy.example.com') Rs//sproxy URL with no authority: %riiN(RR|RRTR{R R ( tproxytschemetr_schemet authoritytendtuserinfothostporttusertpassword((s..\python\lib\urllib2.pyt _parse_proxygs2      RcBs#eZdZddZdZRS(idcCs|dkrt}nt|ds3td||_x=|jD]/\}}t|d||||jdqIWdS(Nthas_keysproxies must be a mappings%s_opencSs||||S(N((trRR9R((s..\python\lib\urllib2.pyts(RRRGtAssertionErrortproxiesR=tsetattrRu(RRR9R((s..\python\lib\urllib2.pyR s    c Cs|j}t|\}}}}|dkr9|}n|jrUt|jrUdS|r|rdt|t|f} tj| j} |j dd| nt|}|j ||||ks|dkrdS|j j |d|j SdS(Ns%s:%ssProxy-authorizationsBasic RYR(RURRR5RRtbase64t b64encodetstripR>RZRRR( RRRR9t orig_typet proxy_typeRRRt user_passtcreds((s..\python\lib\urllib2.pyRus     N(R"R#RRR Ru(((s..\python\lib\urllib2.pyRs tHTTPPasswordMgrcBs8eZdZdZdZedZdZRS(cCs i|_dS(N(tpasswd(R((s..\python\lib\urllib2.pyR scCst|tr|g}n||jkr:i|j|tparse_keqv_listtparse_http_listtget_authorizationR<RcR"RR`RRR(RRR%ttokent challengetchaltauth_valtresp((s..\python\lib\urllib2.pyR?s cCs<tjd|j|tjtdfj}|d S(Ns %s:%s:%s:%sii(thashlibtsha1R;ttimetctimeR9t hexdigest(Rtnoncetdig((s..\python\lib\urllib2.pyt get_cnoncescCsdyK|d}|d}|jd}|jdd}|jdd}Wntk r_dSX|j|\}} |dkrdS|jj||j\} } | dkrdS|jr|j|j |} nd} d| || f} d|j |j f}|d kr||j kr?|j d 7_ nd |_ ||_ d |j }|j|}d ||||||f}| || |}nD|dkr| || d|||f}ntd |d| |||j |f}|r|d|7}n| r5|d| 7}n|d|7}|r`|d||f7}n|S(NRRMtqopt algorithmtMD5topaques%s:%s:%ss%s:%sR%is%08xs%s:%s:%s:%s:%ssqop '%s' is not supported.s>username="%s", realm="%s", nonce="%s", uri="%s", response="%s"s , opaque="%s"s , digest="%s"s, algorithm="%s"s, qop=auth, nc=%s, cnonce="%s"(RcRtKeyErrortget_algorithm_implsRR R.ROtget_entity_digestRRRPRXR<R;ROR(RRRERRMRPRQRStHtKDRR#tentdigtA1tA2tncvaluetcnoncetnoncebittrespdigR((s..\python\lib\urllib2.pyRBsV    !        ( csU|j}|dkr$dn|dkr<dnfd}|fS(NRRcSstj|jS(N(RHtmd5RL(tx((s..\python\lib\urllib2.pyR stSHAcSstj|jS(N(RHRIRL(Ra((s..\python\lib\urllib2.pyR scsd||fS(Ns%s:%s((R7td(RW(s..\python\lib\urllib2.pyRs(tupper(RRQRX((RWs..\python\lib\urllib2.pyRUs     cCsdS(N(R(RRRE((s..\python\lib\urllib2.pyRVsN( R"R#RR RR!R?RORBRURV(((s..\python\lib\urllib2.pyR:s   = tHTTPDigestAuthHandlercBs#eZdZdZdZdZRS(sAn authentication protocol defined by RFC 2069 Digest authentication improves on basic authentication because it does not transmit passwords in the clear. R*icCs?tj|jd}|jd|||}|j|S(Niswww-authenticate(R/R.R!R(RRR(R%R&R<R5tretry((s..\python\lib\urllib2.pyR+ s   (R"R#R+R"RR+(((s..\python\lib\urllib2.pyRestProxyDigestAuthHandlercBseZdZdZdZRS(sProxy-AuthorizationicCs2|j}|jd|||}|j|S(Nsproxy-authenticate(RWR!R(RRR(R%R&R<R5Rf((s..\python\lib\urllib2.pyR--s    (R"R#R"RR-(((s..\python\lib\urllib2.pyRg(stAbstractHTTPHandlercBs/eZddZdZdZdZRS(icCs ||_dS(N(t _debuglevel(Rt debuglevel((s..\python\lib\urllib2.pyR 6scCs ||_dS(N(Ri(Rtlevel((s..\python\lib\urllib2.pytset_http_debuglevel9sc Cs:|j}|s!tdn|jr|j}|jds[|jddn|jds|jddt|qn|}|jrt|j \}}t |\}}n|jds|jd|nxH|j j D]:\}} |j }|j|s|j|| qqW|S(Ns no host givens Content-types!application/x-www-form-urlencodedsContent-lengths%dR-(RWRRORRRbR`RR[RRXRRRkR^( RR4R5Rtsel_hostRtseltsel_pathRKRD((s..\python\lib\urllib2.pyt do_request_<s.      c s|j}|s!tdn||d|j}|j|jt|jjtfd|jj Dddks Rt Connectioncss'|]\}}|j|fVqdS(N(ttitle(RRKR_((s..\python\lib\urllib2.pys vssProxy-AuthorizationR<t buffering(RWRRtset_debuglevelRiRR?RfR<R=R;t set_tunnelR4RPRXRt getresponseRRyRRwRtrecvt _fileobjectRR&R.tstatusR%R( Rt http_classRR5Rttunnel_headerstproxy_auth_hdrRterrR(RG((R<s..\python\lib\urllib2.pyRtYs> ,    %    (R"R#R RlRpRt(((s..\python\lib\urllib2.pyRh4s   RcBseZdZejZRS(cCs|jtj|S(N(RtRtHTTPConnection(RR((s..\python\lib\urllib2.pyt http_opens(R"R#RRhRpt http_request(((s..\python\lib\urllib2.pyRs RRcBseZdZejZRS(cCs|jtj|S(N(RtRtHTTPSConnection(RR((s..\python\lib\urllib2.pyt https_opens(R"R#RRhRpt https_request(((s..\python\lib\urllib2.pyRs tHTTPCookieProcessorcBs2eZddZdZdZeZeZRS(cCs4ddl}|dkr'|j}n||_dS(Ni(t cookielibRt CookieJart cookiejar(RRR((s..\python\lib\urllib2.pyR s  cCs|jj||S(N(Rtadd_cookie_header(RR4((s..\python\lib\urllib2.pyRscCs|jj|||S(N(Rtextract_cookies(RR4Rx((s..\python\lib\urllib2.pyRsN(R"R#RR RRRR(((s..\python\lib\urllib2.pyRs    RcBseZdZRS(cCs |j}td|dS(Nsunknown url type: %s(RUR(RRR9((s..\python\lib\urllib2.pyRs (R"R#R(((s..\python\lib\urllib2.pyRscCsmi}x`|D]X}|jdd\}}|ddkr[|ddkr[|dd!}n|||Parse list of key=value strings where keys are not duplicated.t=iit"i(R>(tltparsedteltRR((s..\python\lib\urllib2.pyR@s  cCsg}d}t}}x|D]}|r?||7}t}qn|r|dkr]t}qn|dkrrt}n||7}qn|dkr|j|d}qn|dkrt}n||7}qW|r|j|ng|D]}|j^qS(spParse lists as described by RFC 2068 Section 2. In particular, parse comma-separated lists where the elements of the list may include quoted-strings. A quoted-string could contain a comma. A non-quoted string could have quotes in the middle. Neither commas nor quotes count if they are escaped. Only double-quotes count, not single-quotes. R,s\Rt,(RhRRR(R7trestparttescapeRtcur((s..\python\lib\urllib2.pyRAs4            cCs-ytj|SWntjk r(dSXdS(N(Rt gethostbynametgaierrorR(R5((s..\python\lib\urllib2.pyt_safe_gethostbynamesRcBs)eZdZdZdZdZRS(cCsq|j}|d dkr`|dd!dkr`|jr`|jdkr`d|_|jj|S|j|SdS(Nis//iRRtftp(RXR5R9RRtopen_local_file(RRR((s..\python\lib\urllib2.pyt file_opens  , cCs|tjdkruy7ttjddtjtjdt_Wqutjk rqtjdft_quXntjS(NRi( RtnamesRRRtgethostbyname_ext gethostnameRR(R((s..\python\lib\urllib2.pyt get_names s$cCs[ddl}ddl}|j}|j}t|}ytj|}|j}|jj |j dt } |j |d} t jtd| pd|| f} |rt|\}} n| s| r(t||jkr(|rd||} n d|} tt|d| | SWntk rJ}t|nXtddS( Nitusegmtis6Content-type: %s Content-length: %d Last-modified: %s s text/plainsfile://trbsfile not on local host(t email.utilst mimetypesRWRXRR.tstattst_sizetutilst formatdatetst_mtimeRt guess_typet mimetoolstMessageRRRRRRtOSErrorR(RRtemailRR5R)t localfiletstatstsizetmodifiedtmtypeR<R:torigurlR&((s..\python\lib\urllib2.pyRs0        N(R"R#RRRRR(((s..\python\lib\urllib2.pyRs  RcBseZdZdZRS(cCsddl}ddl}|j}|s9tdnt|\}}|dkrc|j}n t|}t|\}}|rt |\}}nd}t |}|pd}|pd}yt j |}Wn"t j k r}t|nXt|j\} } | jd} tt | } | d | d} } | rg| d rg| d} ny/|j||||| |j} | rdpd}xM| D]E}t|\}}|jd kr|dkr|j}qqW| j| |\}}d}|j|jd}|r;|d|7}n|dk rd|dkrd|d|7}nt|}tj|}t|||jSWn0|jk r}td|tj dnXdS(Nisftp error: no host givenR,RiiR(tDR9tatARRcsContent-type: %s sContent-length: %d s ftp error: %si(RRRR(RcR(!tftplibRRWRRRtFTP_PORTR}R R RRRRwR RXR>tmapt connect_ftpRR R3RdtretrfileRR.RRRRt all_errorstsystexc_info(RRRRR5R:RRR&RtattrstdirstfiletfwR9RJRDR(tretrlenR<Rtsf((s..\python\lib\urllib2.pytftp_open5s\          !   cCst||||||}|S(N(R (RRRR5R:RRR((s..\python\lib\urllib2.pyRjs(R"R#RR(((s..\python\lib\urllib2.pyR4s 5tCacheFTPHandlercBs5eZdZdZdZdZdZRS(cCs1i|_i|_d|_d|_d|_dS(Nii<i(tcacheRtsoonesttdelayt max_conns(R((s..\python\lib\urllib2.pyR rs     cCs ||_dS(N(R(Rtt((s..\python\lib\urllib2.pyt setTimeoutyscCs ||_dS(N(R(RR((s..\python\lib\urllib2.pyt setMaxConns|scCs|||dj||f}||jkrJtj|j|j|Lsr              X"    o '^ H+@ <   k + 4;