编程小技巧

工作中常用到的一些小工具

  • 数据分组
  • SQL => DataFrame
  • DataFrame => SQL
  • [{}, {}] => csv
  • 转换 %20 =>(空格)
  • DataFrame 新增加一列

编程小技巧

数据分组

1
2
3
4
5
6
7
8
9
10
11
12
13
14
import itertools

def grouper(iterable, n):
"""
group a iterable into chunk
i = [1, 2, 3, 4, 5]
grouper(i, 2) => [[1, 2], [3, 4], [5]]
"""
it = iter(iterable)
while True:
chunk = tuple(itertools.islice(it, n))
if not chunk:
return
yield chunk

sql rs to pandas

方式一: fetchall keys

1
2
3
4
5
6
7
8
9
from pandas import DataFrame

def to_pandas(rs):
res = rs.fetchall()
if not res:
return DataFrame([])
df = DataFrame(res)
df.columns = rs.keys()
return df

方式二: pd.read_sql pd.to_sql

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import pandas as pd
from sqlalchemy import create_engine


class Mysql(object):
def __init__(self):
self._table = 'test'
self.engine = create_engine(
"mysql+pymysql://root:root@localhost:3306/mysql")
self.con = self.engine.connect()

def __del__(self):
print('-----del------')
self.con.close()
print('self')

def select(self, sql):
try:
df = pd.read_sql(sql, self.con)
except Exception as e:
raise (Exception, e)
return df


c = Mysql()
df = c.select('select * from test')
del c

df.to_sql(name='test', con=conn, if_exists='append',
index=False, chunksize=500)

[{}, {}] => csv

1
2
3
4
5
6
7
8
def save_to_csv_file(row_dict, out_name):
file_path = base_path + out_name + '.csv'

keys = row_dict[0].keys()
with open(file_path, 'w') as f:
dict_writer = csv.DictWriter(f, keys)
dict_writer.writeheader()
dict_writer.writerows(row_dict)

转换 %20 =>(空格)

1
2
3
4
import urllib.request
name = self.get_argument("name", "")
name = str(name)
name = urllib.request.unquote(name)

一行 Python 实现并行化 – 日常多线程操作的新思路g

https://segmentfault.com/a/1190000000414339

df 添加一列

方式一: apply

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from elasticsearch_dsl import Search, Q
from libs.es import es

def search_by_id(class_id):
# search = Search(using=es, index='fireman_help_2018*', doc_type='help')\
# .query(Q('bool', must=[Q('terms', classroom=classrooms)]))
search = Search(using=es, index='qos_classroom_event_2018*',
doc_type='dby_raw',extra=dict(size=1))\
.query(Q('bool', must=[Q('match', class_id=class_id)]))
hits = search.execute()
for hit in hits.hits:
source = {k: v for (k, v) in hit.to_dict().items()
if k in ('classroom', 'reason',
'reason_cn', 'class_id',
'scheduled_date_time')}
return source.get('classroom')

df = pd.read_csv(
'/Users/niufeiy/Downloads/user_enter_classroom_log_dist_ali.log')
df['room'] = df['class_id'].apply(lambda x: search_by_id(x))

方式二: loc

1
2
df_s = df.copy()
df_s.loc[:, 'rs'] = preds