sys.version # check the python versionlocals()# show current local symbol tableglobals()# global symbol table#python 3import urllib.parse# can't import urllib and then try to access urllib.parseurllib.parse.quote_plus(s)#python2urllib.quote_plus(s)
DictWriter and imap
from csv import DictWriterfrom multiprocessing import Poolimport timedefprocess_line(line):# biz logicreturn{'name': name,'age': age,}NUM_OF_WORKERS=8pool =Pool(NUM_OF_WORKERS)withopen('input.csv','r')asinput,\open('output.csv','w',newline='')as output: headers =['name','age',] writer =DictWriter(output,fieldnames= headers) writer =writeheader() s = time.time()# imap returns one result as the worker finishes one task# map returns all results when all tasks are donefor result in pool.imap(process_line, input,NUM_OF_WORKERS): writer.writerow(result) e = time.time()rint(f'finished in {e-s} seconds.')
对前面介绍的 python 中进程/线程做一个小结,线程池可以用来解决 I/O 的阻塞,而进程可以用来解决 GIL 对 CPU 的限制(因为每一个进程内都有一个 GIL)。所以你可以开 N 个(小于等于核数)进程池,然后在每一个进程中启动一个线程池,所有的线程池都可以订阅同一个 Queue,来实现真正的多核并行。