Python 学习笔记2

来源：互联网发布：sparksql hive java 编辑：程序博客网时间：2024/05/17 05:16

1、前言

Python 学习笔记，记录平时易忘和零碎的细节。本文为代码块的整理~

2、常用代码块

// 调试时候同时输出变量名和变量值

def print_debug(param_dict):    print "参数名 %s:" % str(param_dict.keys()[0]) +' '+str(param_dict.values()[0])smooth = Trueprint_debug(dict(smooth=smooth))

// python mysql 中的格式化占位符用法
参考：http://stackoverflow.com/questions/28583706/python-warning-truncated-incorrect-double-value

param_list = '%s,' * (len(foo_ids) - 1) + '%s'sql = """select bar_id from foo where foo_id in (%s)""" % param_listcursor.execute(sql, foo_ids)

// enumerate 用法

list= ['a1', 'b2', 'c3', 'd4', 'e5']for i, s in enumerate(list):    print i, "==", s, "==", list[i]输出：    0 == a1 == a1    1 == b2 == b2    2 == c3 == c3    3 == d4 == d4    4 == e5 == e5

// sys.getsizeof()局限，python非内置数据类型的对象无法用sys.getsizeof()获得真实的大小

import sysfor x in (None, 1, 1L, 1.2, 'c', [], (), {}, set()):  print "{0:20s}\t{1:d}".format(type(x).__name__, sys.getsizeof(x))

http://bbs.csdn.net/topics/380050584
http://www.cnblogs.com/kaituorensheng/p/5491705.html

// 获取控制台输入

s = raw_input('please input a string:\n')print 'the string has %d characters.' % len(s)

// 列表元素，进行字符串拼接

L = [1, 2, 3, 4, 5]s1 = ','.join(str(n) for n in L)print s1

// python isinstance 判断某个对象是否是某个类型

ls = [1, 2, 3]print isinstance(ls, list)

// python 随机函数random模块

import randomls = [1, 2, 3, 4, 5]random.shuffle(ls)print lsprint random.choice(ls)choice() : 对于一个可迭代的对象，从中随机选出一个值。shuffle() : 对于一个可迭代的对象，进行混洗。

// python 生成 10 - 20之间的随机小数

import randomprint random.uniform(10, 20)

// 随机抽样

import numpy as npimport randomprint random.sample(range(6), 3)print random.sample(np.arange(100), 3)

// python 字符串和字典进行转化

方式1：a = '{"name":"yct","age":10}'dic_a = eval(a)方式2：value = {"name":"yct","age":10}store_value = json.dumps(value)dic_value = json.loads(store_value)注：dumps是将dict转化成str格式，loads是将str转化成dict格式。dump和load也是类似的功能，只是与文件操作结合起来，需要传入一个文件指针。

// python 对list进行去重

ids = [1, 4, 3, 3, 4, 2, 3, 4, 5, 6, 1]ids = list(set(ids))

// python 除法保留小数

a=1b=2print a/float(b)

//python自加，自乘

x += 1  #自加x *= 2  #自乘# python中没有x++ 和x-- 操作

// 时间 time 包整理

from time import ctimeprint '系统当前时间' + ctime()from time import timestart = time()# <你的程序>print '程序运行了'+ time()-start + '秒'

// python str用法

string = "hello world"print '取最后一个字符',string[-1]print '统计o出现次数',string.count('o')print '取倒数第4个到倒数第1个字符',string[-4:-1]print 'abcdefg'.find('cde’)strs = '1203_2203_83.txt'print strs.rfind("—")  # 从字符串末尾开始查找，第一个"_"位置, 如果找不到返回 -1print strs.rindex("_")  # 从字符串末尾开始查找，第一个"_"位置, 如果找不到则抛出异常print strs.find("_")  # 查找字符串出现的第一个"_"位置, 如果找不到返回 -1print strs.index("_")  # 查找字符串出现的第一个"_"位置, 如果找不到则抛出异常

// python split用法
str.split(str=”“, num=string.count(str))
str – 分隔符,num – 分割次数

line = 'aa bb  ccc  dddd'print line.split()        #按一个空格或者多个空格分print line.split(' ')     #按一个空格分

//两个list同时遍历，zip比这个例子更一般化。当参数长度不同时，zip会以最短序列的长度为准来截断所得到的的元组。

ls1 = [1, 2, 3]ls2 = [4, 5, 6]for (x, y) in zip(ls1, ls2):    print x, '+', y, '=', x + y

// range用法，步长和数字

print range(5,-1,-2)print range(5,-1,-1)print range(1,5,1)print range(1,5,2)

// all()函数的使用。
使用示例：if not all((var_1, var_2))
内部实现等价于：

def all(iterable):    for element in iterable:        if not element:            return False    return True

============= python 使用小技巧 =============
// list 复制

如果只是复制内容的话，用 b = a[:]如果涉及对象引用的话，用 b = a

// str1是一个字符串，则: str1[-1]直接定位到该字符串末尾字符。

// python 注释中的 type/TODU关键字的使用

// 后面没用到的变量，但是必须出现的情况，用 _ 表示。

// 循环跳过不必要的内容：

for _ in li[1:]

// 只想循环十次：

for _ in range(10)

// 三目运算符

a,b = 3,2print "True" if a > b else “False"

// set用法，可用于去重

s=set(['spam'])s.add('xyd’)

// return 时不能直接返回值，必须给这个值赋一个名字

// 私有变量、函数命名，前面都加一个_表示。

// 元组不能修改内容，list不能修改内容。能用tuple尽量用tuple。

// python接受运行参数：sys.argv[0] 表示脚本名，sys.argv[1] 表示参数名

============= python dict 操作 ==============
// 两个字典元素合并

dictMerged1=dict(dict1.items()+dict2.items())

// 判断字典中是否包含某一键

print dic.has_key('name')print 'name' in dic.keys()

// 生成字典的一种方法。在Python 2中，相关的内置map函数，用类似方式把序列的元素配对起来，但是如果参数长度不同，则会为较短的序列用None补齐。而python3不再支持该函数。

keys = ['spam', 'eggs', 'toast', 'eggs']vals = [1, 3, 5, 6]print dict(zip(keys, vals))

// 生成字典的一种方法。

template_data_map = {item["id"]: item for item in template_data}

// 对字典的遍历

for k in dic.keys():    print dic[k]for key, value in dic.iteritems():    print key, value

// 有序字典

d = collections.OrderedDict()

============= python list 操作 ==============
// 一种生成 list 的方法

map(lambda accept_user_obj: accept_user_obj["login_name"], accept_user_objs)

// 一种生成 list 的方法。用列表推导取代map,filter函数。即根据一份列表制作另一份列表。

a=[1,2,3,4,5]squares=[x**2 for x in a]print squares

//从一个list生成其他list，并进行条件过滤

print [w for w in ['xyd', 'hyz', 'ssss', 'ewde'] if len(w) < 4]

// 关于 list 的各种操作

ls = ['the', 'the', 'thw', 'tie', 'ahe']print ls.index('the')    # 找出一个词第一次出现的索引号。print ls.count('the')    # 统计某一个元素出现的次数ls.insert(2, 'xyd')      # 在 list 中插入元素，并指定索引号。print lsprint ls.pop()           # 删除最后一个元素，并返回索引号print ls

结果如下图：
这里写图片描述

// 关于list切片

ls = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']print ls[-1]    # hprint ls[1]     # bprint ls[:]     # ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']print ls[:5]    # ['a', 'b', 'c', 'd', 'e']print ls[:-1]   # ['a', 'b', 'c', 'd', 'e', 'f', 'g']print ls[4:]    # ['e', 'f', 'g', 'h']print ls[-3:]   # ['f', 'g', 'h']print ls[2:5]   # ['c', 'd', 'e']print ls[2:-1]  # ['c', 'd', 'e', 'f', 'g']print ls[-3:-1] # ['f', 'g']

3、 python 常用代码段

3.1 单例模式装饰器

def singleton(cls,*args, **kwargs):    instances = {}    def _singleton():        if cls not in instances:            instances[cls] = cls(*args, **kwargs)        return instances[cls]    return _singleton@singletonclass MyClass:  ...

3.2 时间装饰器

#!/usr/bin/python# -*- coding: utf-8 -*-import signalimport functoolsclass TimeoutException(Exception):    passdef timeout(seconds, error_message="timeout error, func exec too long"):    def decorated(func):        result = [None]        def _handle_timeout(signum, frame):            result[0] = error_message            raise TimeoutException(error_message)        @functools.wraps(func)        def wrapper(self, *args, **kwargs):            signal.signal(signal.SIGALRM, _handle_timeout)            signal.alarm(seconds)            try:                result[0] = func(self, *args, **kwargs)            finally:                signal.alarm(0)            return result[0]        return wrapper    return decorated

3.3 文件操作类

# coding:utf-8from os.path import join, getsizeimport shutilimport osclass FileUtil(object):    @staticmethod    def clear_all_files(path):        """        清空一个文件夹下所有文件        """        shutil.rmtree(path)        os.mkdir(path)    @staticmethod    def get_dir_size(path):        """        循环遍历,得到一个文件夹下所有文件的总和大小        """        size = 0L        for root, dirs, files in os.walk(path):            size += sum([getsize(join(root, name)) for name in files])        return size    @staticmethod    def get_all_file_path(path):        """        循环遍历,得到一个文件夹第一层下的文件路径        """        import os        file_name_list = os.listdir(path)        return [path+os.sep+file_name for file_name in file_name_list]    @staticmethod    def get_all_file_name(path):        """        循环遍历,得到一个文件夹下第一层下的文件名        """        import os        file_name_list = os.listdir(path)        return file_name_list    @staticmethod    def write_file(file_path, context, method='a'):        """        写数据到一个文件        :param file_path:        :param method: 'a'表示默认为追加方式, 'wb'表示覆盖或者创建文件写入        :param context:        """        file_path = unicode(file_path, "utf8")        with open(file_path, method) as fo:            fo.write(context)        # 关闭打开的文件        fo.close()    @staticmethod    def read_file(file_path):        """        读数据        """        line_list = []        file_path = unicode(file_path, "utf8")        with open(file_path, "r") as in_file:            for line in in_file:                line_list.append(line.strip())        return line_listif __name__ == '__main__':    print FileUtil.get_all_file_path('/Users/haizhi/xyd')

3.4 邮件发送类

内容有点多，单独写一个页面了。
参考：python 发送邮件

3.5 日期函数使用小结

内容有点多，单独写一个页面了。
参考：python 日期操作

3.6 python hdfs使用小结

# coding:utf-8import pyhdfsimport osclass HDFSService(object):    def __init__(self):        self.client = pyhdfs.HdfsClient(hosts='localhost:50070')        self.sep = os.path.sep    def file_if_exist(self, hdfs_path):        return self.client.exists(hdfs_path)    def upload_to_hdfs(self, local_path, hdfs_path):        self.client.copy_from_local(local_path, hdfs_path)    def download_from_hdfs(self, hdfs_path, local_path):        self.client.copy_to_local(hdfs_path, local_path)    def delete_from_hdfs(self, hdfs_path):        self.client.delete(hdfs_path)    def get_all_file_path(self, hdfs_path):        file_name_ls = self.client.listdir(hdfs_path)        return map(lambda file_name: hdfs_path + self.sep + file_name, file_name_ls)

3.7 python objgraph包使用

objgraph 用于诊断python内存问题。mac系统下需进行如下准备工作：

brew install graphviz
pip install graphviz
pip install objgraph

x = [1, 2, 3]y = [x, dict(key1=x)]z = [y, (x, y)]import objgraphobjgraph.show_refs([z], filename='ref_topo.png')

输出的图片如下：
这里写图片描述

3.8 python ConfigParser使用

新建一个config.conf文件，内容如下：

[redis]host=bd-004port=6379db=0[log_search]max_log_count = 50000[hdfs]host = 127.0.0.1port = 50070

新建一个Python文件

import ConfigParsercf = ConfigParser.ConfigParser()cf.read("./config.conf")secs = cf.sections()opts = cf.options("hdfs")kvs = cf.items("hdfs")in_path = cf.get("hdfs", "host")print secsprint optsprint kvsprint in_path

这里写图片描述

0 0