2007-07-06
Python练习:在线更新工具2(07-06)
更新一些地方:
1.用工具下载curl
curl比wget更强大一些,也提供了很多开发库,可以方便使用.据说可以实现多线程下载.
2.多线程
Python的多线程比较简单,需要注意的是要所有的线程都结束了,主程序才能关闭,所以要在主程序中调用子线程的join方法.
另,在执行curl的时候,没有直接调用,而是采用的start curl的方法,这样会分别开几个控制台,主程序也能很快的结束.
代码如下:
import urllib,os,re,threading
class UpdateItem:
'''
test.zip,20,c:\temp,http://www.xxx.com,
'''
def __init__(self,sequence):
self.fileName = sequence[0]
self.fileSize = int(sequence[1])
self.filePath = sequence[2]
self.url = sequence[3]
self.len = 4
if len(sequence) > 4:
self.pageUrl = sequence[4]
self.regstr = sequence[5]
self.len = 6
def __str__(self):
str1 = ','.join((self.fileName,str(self.fileSize),self.filePath,self.url))
if self.len != 4:
str1 = ','.join((str1,self.pageUrl,self.regstr))
return str1
def __len__(self):
return self.len
class UrlParse(threading.Thread):
def __init__(self, item):
threading.Thread.__init__(self)
self.item = item
def isNewSize(self, oldsize):
self.socket = urllib.urlopen(self.item.url)
fileSize = int(self.socket.headers['Content-Length'])
if oldsize == fileSize:
return False
self.item.fileSize = fileSize
return True
def download(self):
if len(self.item) > 4:
##have to parse the page first
self.socket = urllib.urlopen(self.item.pageUrl)
try:
content = self.socket.read()
pattern = self.item.regstr.rstrip()
pattern = pattern.replace (':','%3A')
pattern = pattern.replace('/','%2F')
match = re.search(pattern,content)
self.item.url = match.group()[:]
self.item.url = self.item.url.replace('%3A',':')
self.item.url = self.item.url.replace('%2F','/')
print 'Get url from page==>',self.item.url
finally:
self.socket.close()
if not self.isNewSize(self.item.fileSize):
print self.item.fileName, '==>Up to Date'
return
path = ''.join(( self.item.filePath,'/',self.item.fileName))
print 'Starting download file ',self.item.url
os.system(' '.join(('start curl -o',path ,self.item.url)))
## f = open(''.join(( self.item.filePath,'/',self.item.fileName)), 'wb')
## try:
## while True:
## data = self.socket.read(8192)
## if not data:
## break
## f.write(data)
## finally:
## self.socket.close()
## f.close()
def run(self):
self.download()
if __name__ == '__main__':
'''
'''
lists = []
threads = []
print 'loading config file...',
if os.path.exists('updater.dat'):
f = open('updater.dat')
try:
for line in f:
args = line.split(',')
lists.append(UpdateItem(args))
finally:
f.close()
print 'done'
print 'Starting job...'
for item in lists:
parse = UrlParse(item)
##parse.download()
threads.append(parse)
parse.start()
for t in threads:
t.join ()
print 'All done, Press any key to quit'
f = open('updater.dat','w')
try:
for item in lists:
f.write(str(item))
finally:
f.close()
raw_input()
else:
print 'Please make a config file.'
1.用工具下载curl
curl比wget更强大一些,也提供了很多开发库,可以方便使用.据说可以实现多线程下载.
2.多线程
Python的多线程比较简单,需要注意的是要所有的线程都结束了,主程序才能关闭,所以要在主程序中调用子线程的join方法.
另,在执行curl的时候,没有直接调用,而是采用的start curl的方法,这样会分别开几个控制台,主程序也能很快的结束.
代码如下:
import urllib,os,re,threading
class UpdateItem:
'''
test.zip,20,c:\temp,http://www.xxx.com,
'''
def __init__(self,sequence):
self.fileName = sequence[0]
self.fileSize = int(sequence[1])
self.filePath = sequence[2]
self.url = sequence[3]
self.len = 4
if len(sequence) > 4:
self.pageUrl = sequence[4]
self.regstr = sequence[5]
self.len = 6
def __str__(self):
str1 = ','.join((self.fileName,str(self.fileSize),self.filePath,self.url))
if self.len != 4:
str1 = ','.join((str1,self.pageUrl,self.regstr))
return str1
def __len__(self):
return self.len
class UrlParse(threading.Thread):
def __init__(self, item):
threading.Thread.__init__(self)
self.item = item
def isNewSize(self, oldsize):
self.socket = urllib.urlopen(self.item.url)
fileSize = int(self.socket.headers['Content-Length'])
if oldsize == fileSize:
return False
self.item.fileSize = fileSize
return True
def download(self):
if len(self.item) > 4:
##have to parse the page first
self.socket = urllib.urlopen(self.item.pageUrl)
try:
content = self.socket.read()
pattern = self.item.regstr.rstrip()
pattern = pattern.replace (':','%3A')
pattern = pattern.replace('/','%2F')
match = re.search(pattern,content)
self.item.url = match.group()[:]
self.item.url = self.item.url.replace('%3A',':')
self.item.url = self.item.url.replace('%2F','/')
print 'Get url from page==>',self.item.url
finally:
self.socket.close()
if not self.isNewSize(self.item.fileSize):
print self.item.fileName, '==>Up to Date'
return
path = ''.join(( self.item.filePath,'/',self.item.fileName))
print 'Starting download file ',self.item.url
os.system(' '.join(('start curl -o',path ,self.item.url)))
## f = open(''.join(( self.item.filePath,'/',self.item.fileName)), 'wb')
## try:
## while True:
## data = self.socket.read(8192)
## if not data:
## break
## f.write(data)
## finally:
## self.socket.close()
## f.close()
def run(self):
self.download()
if __name__ == '__main__':
'''
'''
lists = []
threads = []
print 'loading config file...',
if os.path.exists('updater.dat'):
f = open('updater.dat')
try:
for line in f:
args = line.split(',')
lists.append(UpdateItem(args))
finally:
f.close()
print 'done'
print 'Starting job...'
for item in lists:
parse = UrlParse(item)
##parse.download()
threads.append(parse)
parse.start()
for t in threads:
t.join ()
print 'All done, Press any key to quit'
f = open('updater.dat','w')
try:
for item in lists:
f.write(str(item))
finally:
f.close()
raw_input()
else:
print 'Please make a config file.'