Я думаю, что gevent должен использовать меньше памяти, чем потоки, но на самом деле это стоит больше памяти, чем потоки.Почему gevent использует больше памяти, чем threading
вот мой код: GEvent
#import gevent.monkey
#import gevent.httplib as ghttplib
import httplib as ghttplib
import httpsqs
#gevent.monkey.patch_all()
#from urlparse import urlparse
#from gevent.pool import Pool
#import gevent
#import MySQLdb
import urllib2
#from MySQLdb.cursors import SSCursor
#import gevent_profiler
import requests
import time
from threading import Thread
#import Queue
import os
import memory
import sys
#gevent_profiler.print_percentages(True)
#gevent_profiler.time_blocking(True)
#gevent_profiler.set_stats_output('my-stats.txt')
user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:10.0)\
Gecko/20100101 Firefox/10.0'
headers = { 'User-Agent' : user_agent }
scale = [1,5,10,20,50,100,200,300]
data = open("thread.txt",'w')
#db=MySQLdb.connect(host='125.221.225.12',user='root',passwd='young001',charset='utf8',db='delicious',use_unicode=True)
#cur = db.cursor()
print os.getpid()
def get(url):
r = requests.get(url,headers=headers,timeout=10)
return r
if(os.path.exists("./urls_httpsqs")):
pass
else:
os.makedirs("./urls_httpsqs")
class URLThread(Thread):
def __init__(self, queue, queue_name, timeout=10, allow_redirects=True):
super(URLThread, self).__init__()
#self.url = url
self.timeout = timeout
self.runflag = True
self.allow_redirects = allow_redirects
self.response = None
self.headers = { 'User-Agent' : user_agent }
#self.db = MySQLdb.connect(host='125.221.225.12',user='root',passwd='young001',charset='utf8',db='delicious',use_unicode=True)
#self.cur = self.db.cursor()
self.queue_name = queue_name
self.queue = queue
def save_disk(self,res,pid):
datafile = open("./urls_httpsqs/%s"%pid,"w")
datafile.write(res.content)
datafile.close()
def run(self):
while self.runflag:
url = self.queue.get(self.queue_name).strip()
if httpsqs.isOK(url):
pass
else:
return
#print "getting",url
try:
self.response = requests.get(url, timeout = self.timeout, headers = self.headers, allow_redirects = self.allow_redirects)
#pid = url.split("/")[-1]
#print "pid is", pid
#self.save_disk(self.response,pid)
#print "file done"
except Exception , what:
print what
#self.insert_into_fail(db,url)
pass
#finally:
#self.queue.task_done()
def stop(self):
self.runflag = False
#queue = Queue.Queue(50)
queue = httpsqs.Httpsqs("125.221.225.12")
queue_name = "coroutine"
#gevent_profiler.attach()
threads = []
now = time.time()
for num in scale:
for i in range(num):
threads.append(URLThread(queue,queue_name))
#for t in threads:
#t.start()
for t in threads:
t.stop()
print memory.resident()
threads = []
data.write(str((memory.resident()/1000000)))
data.write("\t")
data.write(str((memory.resident()/1000000)+memory.memory()/1000000))
data.write("\n")
data.flush()
#sys.exit(0)
#for t in threads:
#t.join()
end = time.time()
print "virtual memory is", memory.memory()
print "resident memory is", memory.resident()
print "stack memory is", memory.stacksize()
print "begin is",now
print "end is",end
print "it costs", end-now
потоковая:
import gevent.httplib as ghttplib
import time
import httplib as ghttplib
import httpsqs
#gevent.monkey.patch_all()
#from urlparse import urlparse
#from gevent.pool import Pool
#import gevent
import MySQLdb
import urllib2
#from MySQLdb.cursors import SSCursor
#import gevent_profiler
import requests
from threading import Thread
import multiprocessing
#import Queue
import os
import memory
#gevent_profiler.print_percentages(True)
#gevent_profiler.time_blocking(True)
#gevent_profiler.set_stats_output('my-stats.txt')
user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:10.0)\
Gecko/20100101 Firefox/10.0'
headers = { 'User-Agent' : user_agent }
scale = [1,5,10,20,50,100,200,300]
data = open("process.txt",'w')
#db=MySQLdb.connect(host='125.221.225.12',user='root',passwd='young001',charset='utf8',db='delicious',use_unicode=True)
#cur = db.cursor()
total_mem = 0
def get(url):
r = requests.get(url,headers=headers,timeout=10)
return r
if(os.path.exists("./urls_httpsqs")):
pass
else:
os.makedirs("./urls_httpsqs")
def save_disk(res,pid):
datafile = open("./urls_httpsqs/%s"%pid,"w")
datafile.write(res.content)
datafile.close()
def run(queue,queue_name):
#print os.getpid()
#print 'total mem is', total_mem
while True:
url = queue.get(queue_name).strip()
if httpsqs.isOK(url):
pass
else:
return
#print "getting",url
try:
#db = MySQLdb.connect(host='125.221.225.12',user='root',passwd='young001',charset='utf8',db='delicious',use_unicode=True)
#response = requests.get(url, timeout = 10)
response = requests.get(url)
#pid = self.insert_into_avail(db,url)
#pid = url.split("/")[-1]
#save_disk(response,1)
except Exception , what:
print what
#self.insert_into_fail(db,url)
pass
queue = httpsqs.Httpsqs("125.221.225.12")
queue_name = "coroutine"
#print os.getpid()
#gevent_profiler.attach()
now = time.time()
record = []
for num in scale:
for i in range(num):
process = multiprocessing.Process(target=run,args=(queue,queue_name))
process.start()
record.append(process)
for i in record:
i.terminate()
record = []
print "done"
print memory.resident()
print num
print memory.resident()*num
data.write(str((memory.resident()*num/1000000)))
data.write("\t")
data.write(str((memory.resident()*num/1000000)+memory.memory()/1000000))
data.write("\n")
data.flush()
#for process in record:
#process.join()
#pool.close()
#pool.join()
data.close()
end = time.time()
print "virtual memory is", memory.memory()
print "resident memory is", memory.resident()
print "stack memory is", memory.stacksize()
print "begin is",now
print "end is",end
print "it costs", end-now
и я использую следующие знать стоимость памяти:
import os
import sys
sys_pid = sys.argv[1]
sys_pid = int(sys_pid)
#_proc_status = '/proc/%d/status' % os.getpid()
_proc_status = '/proc/%d/status' %sys_pid
_scale = {'kB': 1024.0, 'mB': 1024.0*1024.0,
'KB': 1024.0, 'MB': 1024.0*1024.0}
def _VmB(VmKey):
'''Private.
'''
global _proc_status, _scale
# get pseudo file /proc/<pid>/status
try:
t = open(_proc_status)
v = t.read()
t.close()
except:
return 0.0 # non-Linux?
# get VmKey line e.g. 'VmRSS: 9999 kB\n ...'
i = v.index(VmKey)
v = v[i:].split(None, 3) # whitespace
if len(v) < 3:
return 0.0 # invalid format?
# convert Vm value to bytes
return float(v[1]) * _scale[v[2]]
def memory(since=0.0):
'''Return memory usage in bytes.
'''
return _VmB('VmSize:') - since
def resident(since=0.0):
'''Return resident memory usage in bytes.
'''
return _VmB('VmRSS:') - since
def stacksize(since=0.0):
'''Return stack size in bytes.
'''
return _VmB('VmStk:') - since
print "virtual memory is", memory()
print "resident memory is", resident()
print "stack memory is", stacksize()
это из кулинарной книги питона.
и выход: нить
8.310784 23.42912
8.347648 23.445504
8.35584 23.457792
8.368128 23.47008
8.41728 23.519232
8.503296 23.601152
8.671232 24.117248
8.843264 24.293376
GEvent
9.019392 24.829952
9.048064 24.846336
9.056256 24.854528
9.07264 25.14944
9.1136 25.1904
9.19552 25.27232
9.330688 25.407488
9.46176 25.92768
, где я сделал не так?