Pubfig DataBase 下载Python脚本

来源:互联网 发布:js实现饼状图 编辑:程序博客网 时间:2024/06/05 04:25

Original Source: http://www.cs.columbia.edu/CAVE/databases/pubfig/

Useful data:

dev_urls.txt

eval_urls.txt


DownloadPython Script:

notice: Python2.7 supported

import osimport sysimport urllibimport socketdef GetPic(filename, url):    try:        pic = urllib.urlopen(url);fo = open(filename, "wb");        fo.write(pic.read());        fo.close( );    except:        print url + " dead link.";    else:        pass;socket.setdefaulttimeout(10);val_urls = "eval_urls.txt";dev_urls = "dev_urls.txt";fo_read = open(val_urls, "r");fo_read.readline(); #  Database infofo_read.readline(); #  Data formati = 0;process = 1;_sum = 42461;process = int(_sum*0.1921);for iter in range(process):fo_read.readline();for lines in fo_read:    tmpList = lines.split('\t');    dirname = tmpList[0];    url = tmpList[2];    if not os.path.exists(dirname):        os.mkdir(dirname);        i = 0;    GetPic(dirname+'/'+str(i)+".jpg", url);    i += 1;    print str(float(process)/_sum) + " %";    process += 1;print "Done.";print str(_sum - process) + " dead lines.";
import osimport sysimport urllibimport socketdef GetPic(filename, url):    try:        pic = urllib.urlopen(url);fo = open(filename, "wb");        fo.write(pic.read());        fo.close( );    except:        print url + " dead link.";    else:        pass;socket.setdefaulttimeout(10);val_urls = "eval_urls.txt";dev_urls = "dev_urls.txt";fo_read = open(dev_urls, "r");fo_read.readline(); #  Database infofo_read.readline(); #  Data formati = 0;process = 1;_sum = 16336;process = int(_sum*0.4842);for iter in range(process):fo_read.readline();for lines in fo_read:    tmpList = lines.split('\t');    dirname = tmpList[0];    url = tmpList[2];    if not os.path.exists(dirname):        os.mkdir(dirname);        i = 0;    GetPic(dirname+'/'+str(i)+".jpg", url);    i += 1;    print str(float(process)/_sum) + ' %';    process += 1;print "Done.";print str(_sum - process) + " dead lines.";

_sum为笔者下载时该集合图片数目,可能会有变动


Screening Data Script:

# sudo pip install pillow

import osfrom PIL import Imagedef IsValidImage(pathfile):  bValid = True  try:    Image.open(pathfile).verify()  except:    bValid = False  return bValidDATA_TRAIN_BASE = "train/";DATA_TEST_BASE = "val/";test_dirs = os.listdir(DATA_TEST_BASE);train_dirs = os.listdir(DATA_TRAIN_BASE);valid_pic_num_test = 0;valid_pic_num_train = 0;for folder in test_dirs:  subfolder = DATA_TEST_BASE + folder + '/';  files = os.listdir(subfolder);  for pic in files:    pic_path = subfolder + pic;    if IsValidImage(pic_path):      valid_pic_num_test += 1;    else:      os.remove(pic_path);      print pic_path + " removed."for folder in train_dirs:  subfolder = DATA_TRAIN_BASE + folder + '/';  files = os.listdir(subfolder);  for pic in files:    pic_path = subfolder + pic;    if IsValidImage(pic_path):      valid_pic_num_train += 1;    else:      os.remove(pic_path);      print pic_path + " removed."print str(valid_pic_num_train) + " training pictures.";print str(valid_pic_num_test) + " testing pictures.";print "Done."

Want more Face Recognition DataBase:

Here.


0 0