读取jpg文件的exif信息

来源:互联网 发布:淘宝店铺模板什么格式 编辑:程序博客网 时间:2024/05/21 07:49

照片有点多,准备整理一下,按照片拍照的时间进行重新整理,用python写了一个,只读自己感兴趣的exif信息的脚本,看看速度怎么样。



以前用.net写过一个,用的现成的exif类库:大约开启1~5个线程来分析所有目标图片文件,读取exif信息,放入一个队列中;大约开启20个左右的线程来进行重命名、拷贝和删除原始文件。


这个程序运行起来,速度会越来越慢,一直没找到关键原因在哪,觉得可能有的原因:

  1. 瓶颈在硬盘的IO上,copy操作比较多,感觉应该是主要原因,但是无法解释为什么运行速度会越来越慢
  2. 硬件问题,老笔记本Thinkpad R60
  3. .net本身比较慢,程序里加了强制垃圾回收,占用内存空间比较稳定,cpu占用率也不是特别高,线程比较多,但是基本互相之间没有什么资源竞争
  4. 由于照片都是单反拍的,文件大小比较大,exif信息比较多,用exifLib会读取整个图片信息并格式化所有exif信息,但是感觉对程序的性能影响比较小

刚试了一下,没有多线程,速度还挺快的,测试文件大约有2G左右,2秒以内就能完成,全部80多G的照片整理大约需要不到4分钟



# http://www.codeproject.com/Articles/43665/ExifLibrary-for-NET# http://www.exiv2.org/tags.html# http://www.awaresystems.be/imaging/tiff/tifftags.htmlimport osimport structimport randomimport datetimeimport sysimport tracebackclass exiftags:datetime = 0x0132datetime_original = 0x9003datetime_digited = 0x9004exifpointer = 0x8769class log:visited = 0class jpg:def __init__(self, file_path):self.__file_path = file_pathself.__fo = Noneself.__endian = '>'self.__baseoffset = Noneself.exif = {}def __del__(self):if self.__fo is not None:self.__fo.close()def __getfo(self):if self.__fo is None:self.__fo = open(self.__file_path, 'rb')return self.__fodef __isjpg(self):arr = self.__getfo().read(2)if (arr is None) or (len(arr) < 2):return Falseif (ord(arr[0]) == 0xff) and (ord(arr[1]) == 0xd8):return Truereturn Falsedef __read_app0_section(self):pos = self.__getfo().tell()arr = self.__getfo().read(2)if (ord(arr[0]) == 0xff) and (ord(arr[1]) == 0xe0):arr = self.__getfo().read(2)size = struct.unpack('>H', arr)[0] # big-endianpos = self.__getfo().tell()self.__getfo().seek(pos + size - 2, 0) # skip app0 sectionelse:self.__getfo().seek(pos, 0)def __read_app1_section(self):pos = self.__getfo().tell()arr = self.__getfo().read(2)if (ord(arr[0]) == 0xff) and (ord(arr[1]) == 0xe1):arr = self.__getfo().read(2)size = struct.unpack('>H', arr)[0]arr = self.__getfo().read(6)# no exifif arr != '\x45\x78\x69\x66\x00\x00':print("NOT EXIF!")return# base positionself.__baseoffset = self.__getfo().tell()# get little/bigdianarr = self.__getfo().read(2)if (ord(arr[0]) == 0x49) and (ord(arr[1]) == 0x49):self.__endian = '<'elif (ord(arr[0]) == 0x4d) and (ord(arr[1]) == 0x4d):self.__endian = '>'else:print("Failed to get big-/little-endian")raise IOError# TIFF marker, should always be [0x002A]self.__getfo().read(2)arr = self.__getfo().read(4)# Read 0th IFDnextifd = struct.unpack(self.__endian + 'L', arr)[0]if nextifd != 0:exifpointer = {exiftags.exifpointer:None}self.__getfo().seek(self.__baseoffset + nextifd, 0)self.__read_IFD(exifpointer)else:print("Read 0th ifd failed...")return# Read EXIF IFDif exifpointer[exiftags.exifpointer] is None:print("Read EXIF IFD offset failed...")returnnextifd = struct.unpack(self.__endian + 'L', exifpointer[exiftags.exifpointer])[0]if nextifd != 0:self.__getfo().seek(self.__baseoffset + nextifd, 0)self.__read_IFD(self.exif)else:print("exif pointer is 0")else:self.__getfo().seek(pos, 0)def __read_IFD(self,tags):# get IFD field countarr = self.__getfo().read(2)fieldcount = struct.unpack(self.__endian + 'H', arr)[0]# process filedsfor i in range(0, fieldcount):self.__read_IFD_Field(tags)def __read_IFD_Field(self,tags):arr = self.__getfo().read(2)tagid = struct.unpack(self.__endian + 'H', arr)[0]arr = self.__getfo().read(2)type = struct.unpack(self.__endian + 'H', arr)[0]arr = self.__getfo().read(4) count = struct.unpack(self.__endian + 'L', arr)[0]# Byte length of field dataif type == 1:n = countelif (type == 2) or (type == 7):n = countelif (type == 3):n = 2 * countelif (type == 4) or (type == 9):n = 4 * countelif (type == 5) or (type == 10):n = 8 * count# Get value or offsetvalue = self.__getfo().read(4)if tagid not in tags:return# offsetif n > 4:pos = self.__getfo().tell()value = struct.unpack(self.__endian + 'L', value)[0]self.__getfo().seek(self.__baseoffset + value, 0)value = self.__getfo().read(n)self.__getfo().seek(pos, 0)tags[tagid] = valuedef getEXIF(self, tags):try:self.exif = tagsif not self.__isjpg():print("file " + self.__file_path + " is not jpg file")returnself.__read_app0_section()self.__read_app1_section()finally:self.__getfo().seek(0, 0)def testjpg1():tags = {exiftags.datetime:''}j = jpg('/tmp/1.jpg')j.getEXIF(tags)for k in tags:print(hex(k) + "=" + j.exif[k])del jdef visitjpg(destdir, dirname, names):for name in names:if name.find('.jpg') < 0 and name.find('.JPG') < 0:continuetags = {exiftags.datetime_original:None}try:log.visited += 1origpath = os.path.join(dirname, name)j = jpg(origpath)j.getEXIF(tags)if j.exif[exiftags.datetime_original] is None:print("failed to get exif of: " + dirname + "/" + name)continue# the exif read from jpg has \0(NULL bytes) at the end of the string, trim itstrdt = j.exif[exiftags.datetime_original]del jwhile strdt[-1] == "\0":strdt = strdt[0:-1]dt = datetime.datetime.strptime(strdt, '%Y:%m:%d %H:%M:%S')# Get date aggregate folderdtdir = os.path.join(destdir, dt.date().isoformat())if not os.path.exists(dtdir):os.mkdir(dtdir)if not os.path.isdir(dtdir):print("failed to initialize dir: " + dtdir)continuenewpath = os.path.join(dtdir, dt.date().isoformat() +"_" + dt.time().isoformat().replace(":","-") + ".jpg")while os.path.exists(newpath):newpath = os.path.splitext(newpath)[0] + "_" + str(random.randint(0,100)) + os.path.splitext(newpath)[1]os.rename(origpath, newpath) # use shutil.move if src and dest is on difference file systemexcept IOError as e:#print("failed to rename: " + dirname + "/" + name + ' due to: ' + str(e))#traceback.print_exc(file=sys.stdout)print("IOError: failed to rename: " + dirname + "/" + name)traceback.print_exc(file=sys.stdout)except:print("Filed while rename: " + dirname + "/" + name)traceback.print_exc(file=sys.stdout)raisedef mgmjpg(srcdir, destdir):if (not os.path.exists(srcdir)) or (not os.path.isdir(srcdir)):print("src: " + str(srcdir) + " is not a directory")returnstart = datetime.datetime.now()log.visited = 0if not os.path.exists(destdir):os.mkdir(destdir)else:if (not os.path.isdir(destdir)):print("dest: " + str(destdir) + " is not a directory")returnos.path.walk(srcdir, visitjpg, destdir)print("visited %s files" % log.visited)print("started when: %s, finished when: %s, cost: %s" % (str(start), str(datetime.datetime.now()), str(datetime.datetime.now()-start)))def testexif():j = jpg('/Volumes/DATA/Pictures/Dudu1/2013-01-03/2013-01-03_19-53-22_47.jpg')tags = {exiftags.datetime:None, exiftags.datetime_original:None, exiftags.datetime_digited:None}j.getEXIF(tags)print("datetime: %s" % j.exif[exiftags.datetime])print("original: %s" % j.exif[exiftags.datetime_original])print("digited: %s" % j.exif[exiftags.datetime_digited])if __name__ == '__main__':#mgmjpg('/root/src', '/tmp')mgmjpg('/Volumes/DATA/Pictures/Dudu1', '/Volumes/DATA/Pictures/Dudu')


0 0
原创粉丝点击