Python模块学习-1


Python




0x01.urllib & urllib2

参考地址:

https://docs.python.org/2/library/urllib2.html

urllib.urlopen(url[, data[, proxies]])

打开http,并且获取信息。

#! /usr/bin/python
#-*- coding:utf-8 -*-

import urllib  

res = urllib.urlopen('http://www.google.com').read() 
print res    
print 'http header:/n ', res.info()  
print 'http status:/n ', res.getcode()  
print 'url:/n ', res.geturl()  

res.close()

urllib.urlretrieve(url[, filename[, reporthook[, data]]])

urlretrieve方法直接将远程数据下载到本地。

#! /usr/bin/python
#-*- coding:utf-8 -*-

import urllib2  

def callback(a, b, c):  
    '''
    回调函数 
    @a: 已经下载的数据块 
    @b: 数据块的大小 
    @c: 远程文件的大小 
    '''  
    per = 100.0 * a * b / c  
    if per > 100:  
        per = 100  
    print '当前进度 : %.2f%%' % per  

url = 'http://homeway.me/'  
local = 'homeway.html'  
urllib.urlretrieve(url, local, callback)

url2是url的一个增强。

urllib2.urlopen(url)打开url

#! /usr/bin/python
#-*- coding:utf-8 -*-

import urllib2

res = urllib2.urlopen('http://homeway.me')
print res.info()
print res.read()
# do something
response.close()  

urllib2下载文件

#! /usr/bin/python
#-*- coding:utf-8 -*-

import urllib2

file = "homeway.html"
url = "http://homeway.me/"
response = urllib2.urlopen(url)

#open the file for writing
f = open(file, "w")
f.write(response.read())
f.close()

urllib2发送POST包

#! /usr/bin/python
#-*- coding:utf-8 -*-

import urllib2 as lib2
import urllib

def main():
    req_url = 'http://homeway.me/';
    # 设置http报头
    req_headers = {
        'Host': 'homeway.me',
        'User-Agent': 'Mozilla/5.0 (X11; Debian; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3',
        'Accept-Encoding': 'gzip, deflate',
        'DNT' : '1',
        'Connection': 'keep-alive',
        'Content-Type' : 'application/x-www-form-urlencoded',
    }
    # 设置发包内容
    req_data ={
        'url' : 'http://homeway.me/',
    }
    req_body = urllib.urlencode( req_data )

    # urllib2提供 Request 类,用于添加http报头
    req = lib2.Request( req_url, req_headers )
    # 添加 post 数据
    res = lib2.urlopen( req, req_body )

    print res.read()

    res.close()

if __name__ == '__main__':
    main()

urllib2发送GET包

直接get请求,添加参数即可。

url = "http://homeway.me/?user=xiaocao&pwd=123456"
response = urllib2.urlopen(url)

urllib2.HTTPError错误throw

HTTP错误异常必须在前面,否则URL错误也会捕获一个HTTP错误。

from urllib2 import Request, urlopen, URLError, HTTPError  
req = Request(someurl)  
try:  
    response = urlopen(req)  
except HTTPError, e:  
    print 'The server couldn/'t fulfill the request.'  
    print 'Error code: ', e.code  
except URLError, e:  
    print 'We failed to reach a server.'  
    print 'Reason: ', e.reason  

urllib2.URLError错误throw

#! /usr/bin/python
#-*- coding:utf-8 -*-

from urllib2 import Request, urlopen, URLError  
req = Request( 'http://homeway.me' )  

try:  
    response = urlopen(req)  
except URLError, e:  
    if hasattr(e, 'reason'):  
        print 'We failed to reach a server.'  
        print 'Reason: ', e.reason  
    elif hasattr(e, 'code'):  
        print 'The server couldn/'t fulfill the request.'  
        print 'Error code: ', e.code  
    else:
        print 'Unkown error!'

urllib2.Request还有很多函数

...

req_body = urllib.urlencode( req_data )

req = lib2.Request( req_url, req_headers )
res = lib2.urlopen( req, req_body )

# 看名字就能知道的函数
req.add_data( {'Accept-Encoding', 'utf-8'} )
req.add_header( 'Accept-Encoding', 'utf-8' )
print 'Has_header: ',req.has_header('Accept-Encoding')
print 'Get_type: ', req.get_type()
print 'Get_host', req.get_host()
print 'Get_selector', req.get_selector()
print 'Get_header', req.get_header( 'Accept-Encoding', default=None )




httplib

httplib有两个主要类HTTPConnection和HTTPResponse

httplib.HTTPConnection ( host [ , port [ , strict [ , timeout ]]] )

HTTPConnection类的构造函数,表示一次与服务器之间的交互,即请求/响应。参数host表示服务器主机,如:www.csdn.net;port为端口号,默认值为80; 参数strict的 默认值为false, 表示在无法解析服务器返回的状态行时( status line) (比较典型的状态行如: HTTP/1.0 200 OK ),是否抛BadStatusLine 异常;可选参数timeout 表示超时时间。

HTTPConnection.connect ()

HTTPConnection.close ()

HTTPConnection.request ( method , url [ , body [ , headers ]] )

调用request,方法会向服务器发送一次请求,method 表示请求的方法,常用有方法有get 和post;url 表示请求的资源的url;body 表示提交到服务器的数据;headers 表示请求的http 头。

HTTPConnection.getresponse ()

httplib.HTTPResponse

HTTPResponse.read([amt])
  
HTTPResponse.getheader(name[, default])
  
HTTPResponse.getheaders()
  
HTTPResponse.msg
  
HTTPResponse.version
  
HTTPResponse.status


  
  
看看事例吧:

#! /usr/bin/python
# -*- coding: utf-8 -*-

import httplib

def main():
    req_host = 'homeway.me'
    req_url = 'http://homeway.me';
    req_headers = {
        'Host': 'homeway.me',
        'User-Agent': 'Mozilla/5.0 (X11; Debian; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3',
        'Accept-Encoding': 'gzip, deflate',
        'DNT' : '1',
        'Connection': 'keep-alive',
        'Content-Type' : 'application/x-www-form-urlencoded',
    }
    req_data ={
        'url' : 'http://homeway.me/',
    }

    conn = httplib.HTTPConnection( req_host, 80, True )
    conn.request( method ='GET', url = req_url, headers = req_headers, body='' )
    res = conn.getresponse()

    print 'version:', res.version  
    print 'reason:', res.reason  
    print 'status:', res.status  
    print 'msg:', res.msg  
    print 'headers:', res.getheaders() 

    conn.close()

if __name__ == '__main__':
    main()




本文出自 夏日小草,转载请注明出处:http://homeway.me/2014/04/04/Python-Module-Learning-1/


-by小草

2014-04-04 21:55:38

Fork me on GitHub