python 批量处理文件
目录
背景
批量处理文件, 将每个文件相同结构的内容 整理到 一个文件中, 每个文件都在新文件的 一行.
读取是按行读取
代码展示
import os
def eachFile(filepath):
""" 读取文件夹下面的所有文件 的路径"""
pathDir = os.listdir(filepath)
file_path_list = list()
for allDir in pathDir:
child = os.path.join('%s%s' % (filepath, allDir))
file_path_list.append(child)
return file_path_list
file_path_list = eachFile("E:\\rssi\\")
for filename in file_path_list:
aa = list()
with open(filename, "r", encoding="utf8") as f:
lines = f.readlines()
for index, line in enumerate(lines):
if (index > 2) and (index < len(lines)-1):
aa.append(line.strip())
# print(aa)
with open("d.txt", "a+", encoding="utf8") as b:
b.write(" ".join(i for i in aa) + "\r\n")
批量处理html 页面然后解析
这个方法比到上面就显的很笨拙啦
from bs4 import BeautifulSoup
import DBChinaAdministration
db = DBChinaAdministration()
html_file1 = open("anhui.html", "r", encoding="utf8")
html_file2 = open("beijing.html", "r", encoding="utf8")
html_file3 = open("chongqing.html", "r", encoding="utf8")
html_file4 = open("fujiang.html", "r", encoding="utf8")
html_file5 = open("gansu.html", "r", encoding="utf8")
html_file6 = open("guangdong.html", "r", encoding="utf8")
html_file7 = open("guangxi.html", "r", encoding="utf8")
html_file8 = open("guizhou.html", "r", encoding="utf8")
html_file9 = open("hainan.html", "r", encoding="utf8")
html_file10 = open("hebei.html", "r", encoding="utf8")
html_file11 = open("heilongjiang.html", "r", encoding="utf8")
html_file12 = open("henan.html", "r", encoding="utf8")
html_file13 = open("hubei.html", "r", encoding="utf8")
html_file14 = open("hunan.html", "r", encoding="utf8")
html_file15 = open("jiangsu.html", "r", encoding="utf8")
html_file16 = open("jiangxi.html", "r", encoding="utf8")
html_file17 = open("jilin.html", "r", encoding="utf8")
html_file18 = open("liaoning.html", "r", encoding="utf8")
html_file19 = open("neimenggu.html", "r", encoding="utf8")
html_file20 = open("ningxia.html", "r", encoding="utf8")
html_file21 = open("qinghai.html", "r", encoding="utf8")
html_file22 = open("sanxi.html", "r", encoding="utf8")
html_file23 = open("shandong.html", "r", encoding="utf8")
html_file24 = open("shanghai.html", "r", encoding="utf8")
html_file25 = open("shanxishen.html", "r", encoding="utf8")
html_file26 = open("sichuan.html", "r", encoding="utf8")
html_file27 = open("tianjin.html", "r", encoding="utf8")
html_file28 = open("xinjiang.html", "r", encoding="utf8")
html_file29 = open("xizang.html", "r", encoding="utf8")
html_file30 = open("yunnan.html", "r", encoding="utf8")
html_file31 = open("zhejiang.html", "r", encoding="utf8")
file_list = list()
file_list.append(html_file1)
file_list.append(html_file2)
file_list.append(html_file3)
file_list.append(html_file4)
file_list.append(html_file5)
file_list.append(html_file6)
file_list.append(html_file7)
file_list.append(html_file8)
file_list.append(html_file9)
file_list.append(html_file10)
file_list.append(html_file11)
file_list.append(html_file12)
file_list.append(html_file13)
file_list.append(html_file14)
file_list.append(html_file15)
file_list.append(html_file16)
file_list.append(html_file17)
file_list.append(html_file18)
file_list.append(html_file19)
file_list.append(html_file20)
file_list.append(html_file21)
file_list.append(html_file22)
file_list.append(html_file23)
file_list.append(html_file24)
file_list.append(html_file25)
file_list.append(html_file26)
file_list.append(html_file27)
file_list.append(html_file28)
file_list.append(html_file29)
file_list.append(html_file30)
file_list.append(html_file31)
for html_file in file_list:
html_page = html_file.read()
soup = BeautifulSoup(html_page, "html.parser")
a = soup.select('a')
list_code = list()
for index, i in enumerate(a):
if index % 2 == 0:
copycode = i.get("onclick")
b = copycode.replace("(", ",")
c = b.replace(")", ",")
d = c.replace("'", "")
d_split = d.split(",")
if len(d_split):
d_dict = dict()
d_dict["administration_code"] = d_split[2]
d_dict["name"] = d_split[3]
list_code.append(d_dict)
db.bulk_add(list_code)
# print(list_code)
使用numpy 对 csv 文件进行操作
import csv
import numpy as np
import Operation
data = dict()
data[1] = ""
data[2] = ""
data[3] = ""
data[4] = ""
data[5] = ""
data[6] = ""
data[7] = ""
data[8] = ""
data[9] = ""
data[10] = ""
data[11] = ""
data[12] = ""
data[13] = ""
data[14] = ""
data[15] = ""
data[16] = ""
def get_data():
with open('matchrule.csv', 'rt', encoding='utf8') as c:
reader = csv.reader(c)
rows = [row[1:] for row in reader]
a = np.array(rows)
k = list()
row = [i for i in range(len(a))]
for i in range(len(a[0])):
col = list()
for j in range(len(a)):
col.append(i)
y = a[row, col]
k.append(y)
# y = a[[0, 1, 2, 3], [0, 0, 0, 0]]
# 获取所有要添加的对象数据(元素,字典对象, 对数据进行处理)
instance = Operation()
obj_data_list = list()
for one in k:
data_temp = dict()
for index, element in enumerate(one):
key = data.get(index, "")
if key:
element = instance.get_element(key, element)
data_temp[key] = element
obj_data_list.append(data_temp)
# print(obj_data_list)
return obj_data_list
if __name__ == '__main__':
get_data()
转载自:https://blog.csdn.net/yangxiaodong88/article/details/79233203