自己写的python程序(减少工作量)
# 拼接sql
GD_STOCK_PAYMENT PAY_ID 偿还信息主键 DEBT_T_ZWGL_CHBJ CHBJ_ID 偿还本金主单ID
GD_STOCK_PAYMENT DEPT_STOCK_ID 债务信息主键 DEBT_T_ZWGL_CHBJ ZW_ID 债务ID
GD_STOCK_PAYMENT MOF_DIV_CODE 财政区划代码 DEBT_T_ZWGL_CHBJ AD_CODE 区划编码
GD_STOCK_PAYMENT AGENCY_CODE 单位代码 DEBT_T_ZWGL_CHBJ AG_CODE 单位编码
import pandas as pd
import collections
t_name = "GD_ISSUE_PLAN"
filepath = 'data\{}.csv'.format(t_name)
# 读取文件
df = pd.read_csv(filepath, sep='\t', header = None)
# 获取第一列并去重
tag_table = df[0].drop_duplicates().values[0]
# 获取源表
source_tables = df[3].drop_duplicates().dropna().values # ndarray类型
# 表名和别名放入字典
num = 0
# dic = { tag_table: "T{}".format(num) }
dic = collections.OrderedDict()
for table in source_tables:
num += 1
dic[table] = "T{}".format(num)
cols = ",".join(df.iloc[:, 1])
list1 = []
list1.append("INSERT INTO {}({})".format(tag_table, cols))
list1.append("\nSELECT")
# 目标表名 目标字段名 源表表名 源表字段名
columns = df.iloc[:, [0,1,3,4]]
source_ad_table = '' # 区划在哪个源表
source_agency_table = '' # 单位在哪个源表 ?哪个字段关联
# 拼接select部分
for row in range(0, columns.shape[0]):
if pd.isna(columns[4][row]):
if row != 0 :
list1.append("\n\t ,{} AS {}".format("''", columns[1][row]))
else:
list1.append("\n\t {} AS {}".format("''", columns[1][row]))
else:
tablename = dic[columns[3][row]]
# 处理区划
if columns[1][row] == "MOF_DIV_CODE":
num += 1
list1.append( "\n\t ,NVL(T{}.CZ_CODE, {}.AD_CODE) AS {}".format(num, tablename, columns[1][row]) )
dic["DSY_V_ELE_AD"] = "T{}".format(num)
source_ad_table = columns[3][row]
# 处理单位
elif columns[1][row] in ("AGENCY_CODE", "AGENCY_COD"):
num += 1
list1.append( "\n\t ,CASE WHEN T{}.CODE IS NULL THEN {}.AG_CODE ELSE T{}.AGENCY_CODE END AS AGENCY_CODE".format(num, tablename, num) )
dic["BDA_T_DWDZ"] = "T{}".format(num)
source_agency_table = columns[3][row]
# 处理时间
elif columns[1][row].endswith( ("TIME", "DATE") ) : #, beg=[0, len(str)]
list1.append("\n\t ,regexp_replace({}.{}, '-|:| ', '') AS {}".format(tablename, columns[4][row], columns[1][row]))
else :
if row != 0 :
list1.append("\n\t ,{}.{} AS {}".format(tablename, columns[4][row], columns[1][row]))
else:
list1.append("\n\t {}.{} AS {}".format(tablename, columns[4][row], columns[1][row]))
# 拼接from和join部分
index = 0
for key in dic:
index += 1
if index == 1:
list1.append("\n FROM {}@DSY_HBDZ_DBLINK {}".format(key, dic[key]))
else:
if key == "DSY_V_ELE_AD":
list1.append("\nLEFT JOIN {} {}\n ON case when {}.AD_CODE='4200' then '420000' when {}.AD_CODE='4290' then '4298' else {}.AD_CODE end = {}.CODE".format(key, dic[key], dic[source_ad_table],dic[source_ad_table],dic[source_ad_table], dic[key]))
elif key == "BDA_T_DWDZ":
list1.append("\nLEFT JOIN {} {}\n ON {}.AG_ID = {}.GUID".format(key, dic[key], dic[source_agency_table], dic[key]))
else:
list1.append("\nLEFT JOIN {}@DSY_HBDZ_DBLINK {}\n ON ".format(key, dic[key]))
# dic
# list1
print( ' '.join(list1) )
# df
columns
# cols
修改加一列
GD_STOCK_PAYMENT PAY_ID 偿还信息主键 DEBT_T_ZWGL_CHBJ CHBJ_ID 偿还本金主单ID CHBJ_ID
GD_STOCK_PAYMENT DEPT_STOCK_ID 债务信息主键 DEBT_T_ZWGL_CHBJ ZW_ID 债务ID ZW_ID
GD_STOCK_PAYMENT MOF_DIV_CODE 财政区划代码 DEBT_T_ZWGL_CHBJ AD_CODE 区划编码 AD_CODE
GD_STOCK_PAYMENT AGENCY_CODE 单位代码 DEBT_T_ZWGL_CHBJ AG_CODE 单位编码 AG_CODE
PAY_ID
DEPT_STOCK_ID
MOF_DIV_CODE
AGENCY_CODE
修改:
df0 = pd.read_csv(filepath, sep='\t', header = None)
df = df0.iloc[:, [0,1,2,3,4,5]].dropna(thresh=2)
df2 = df0.iloc[:, [1,4,6]] #5
dic2 = {}
list2 = []
# 赋值
for row in range(0, df2.shape[0]):
if( pd.isna(df2[4][row]) ):
pass
else:
dic2[df2[4][row]] = df2[1][row]
for row in range(0, df2.shape[0]):
if pd.isna(df2[6][row]):
pass
else:
if df2[6][row] in dic2.keys():
list2.append("\n,{} as {}".format( dic2[df2[6][row]], df2[6][row] ) )
else:
list2.append("\n,null as {}".format(df2[6][row]) )
print("".join(list2))
# df2
# dic2
# 尝试将Oracle替换为MySQL语法(未完成)
由于有些SQL函数嵌套太复杂, 暂时搁置
# import pandas as pd
# t_name = "数据"
# filepath = 'data{}.xlsx'.format(t_name)
# df = pd.read_excel(filepath, engine='openpyxl')
# df['Q_TABLE'].apply(函数, axis=1)
import re
str = """SELECT Length(BILL.AD_CODE) as AD_CODE,
sys_Guid() as guid,
to_char(sysdate, 'yyyy-MM/dd HH24:mi:ss') as test2,
to_char(sysdate, 'yyyy/MM/dd') as test3,
to_char(sysdate, 'yyyy-MM') as test4,
to_char(sysdate, 'yyyy') as test5,
to_char(1540265121) as test6,
to_date(a.xxx_date, 'YYYY-MM-DD HH24:mi:ss') as test7,
to_date(a.xxx_date, 'YYYY-MM-DD') as test8,
"""
# length(str)函数
def rep_length(text):
return re.sub(r'length\(', r'char_length(', text, flags = re.IGNORECASE)
# sys_guid()函数
def rep_guid(text):
return re.sub(r'sys_guid\(\)', r'UUID()', text, flags = re.IGNORECASE)
# to_char
def rep_tochar(text):
# to_char(sysdate, 'yyyy-MM-dd HH24:mi:ss') -> DATE_FORMAT(NOW(),'%Y-%m-%d %H:%i:%s')
text = re.sub(r'(\s+)to_char\(([\w|\.]+),\s*\'[y]{4}([/|-])[m]{2}([/|-])[d]{2} [h|\d|:|m|i|s]{10}\'\)', r'\1date_format(\2, "%Y\3%m\4%d %h:%i:%s")', text, flags = re.IGNORECASE)
text = re.sub(r'(\s+)to_char\(([\w|\.]+),\s*\'[y]{4}([/|-])[m]{2}([/|-])[d]{2}\'\)', r'\1date_format(\2, "%Y\3%m\4%d")', text, flags = re.IGNORECASE)
text = re.sub(r'(\s+)to_char\(([\w|\.]+),\s*\'[y]{4}([/|-])[m]{2}\'\)', r'\1date_format(\2, "%Y\3%m")', text, flags = re.IGNORECASE)
text = re.sub(r'(\s+)to_char\(([\w|\.]+),\s*\'[y]{4}\'\)', r'\1date_format(\2, "%Y")', text, flags = re.IGNORECASE)
# text = re.sub(r'(\s+)to_char\(([\w|\.]+),(\s+\'[y|m|d|/|-]{8,10} [h|\d|:|m|i|s]{10}\')\)', r'\1date_format(\2, "%Y-%m-%d %h:%i:%s")', text, flags = re.IGNORECASE)
# to_char(154643546466)
text = re.sub(r'(\s+)to_char\(([\w|\.]+)\)', r'\1CAST(\2 AS CHAR)', text, flags = re.IGNORECASE)
return text
# to_date
def rep_todate(text):
# to_date(a.xxx_date, 'YYYY-MM-DD') to_date('2019-01-01', 'YYYY-MM-DD')未匹配
text = re.sub(r'(\s+)to_date\(([\w|\.]+),(\s+\'[y]{4}([/|-])[m]{2}([/|-])[d]{2} [h|\d|:|m|i|s]{10}\')\)', r'\1str_to_date(\2, "%Y\3%m\4%d %h:%i:%s")', text, flags = re.IGNORECASE)
text = re.sub(r'(\s+)to_date\(([\w|\.]+),(\s+\'[y]{4}([/|-])[m]{2}\')\)', r'\1str_to_date(\2, "%Y\3%m\4%d")', text, flags = re.IGNORECASE)
return text
# nvl
def rep_nvl(text):
return re.sub(r'(\s+)nvl\(', r'\1ifnull(', text, flags = re.IGNORECASE)
# to_number
def rep_tonumber(text):
return re.sub(r'(\s+)to_number\(', r'\1ifnull(', text, flags = re.IGNORECASE)
# sysdate
def rep_sysdate(text):
return re.sub(r'sysdate', r'now() ', text, flags = re.IGNORECASE)
# concat
# ROWNUM
# full join
# decode
# 练习题
# 字母count
频次相同, 大写字母在前小写字母在后
import re
str = input("请输入字符串:")
# str="A BVYLKG computer program is said to learn from experience E with respect to some task T and some performance measure P, if its performance on T, as measured by P, improves with experience E."
filename='word_count.txt'
# 正则匹配找到所有的字母
list = re.findall(r'[a-z|A-Z]',str)
# type(list) # 查看类型
dic = {} # 定义字典(键值对)
for ch in list:
dic[ch] = dic.get(ch, 0) + 1 # 若key对应的value不存在,置为0+1
# 按两列倒序 由于大写字母在小写字母之前,转ascii码后取负解决
res = sorted(dic.items(), key=lambda kv: (kv[1], -ord(kv[0])), reverse=True)
print(res)
with open(filename, 'w', encoding='utf-8') as file:
for row in res:
file.write("(\'{}\', {})\n".format(row[0], row[1]))
"""--------------------------------------------------------------------"""
import pandas as pd
import re
filename='word_count.txt'
str="A BVYLKG computer program is said to learn from experience E with respect to some task T and some performance measure P, if its performance on T, as measured by P, improves with experience E."
# 正则匹配找到所有的字母
list1 = re.findall(r'[a-z|A-Z]',str)
list2 = [1 for x in range(0, len(list1))]
zip1 = zip(list1, list2)
df = pd.DataFrame(zip1, columns=['key', 'value'])
df1 = df.groupby('key').sum().sort_values(by=['value', 'key'], ascending=[False, True]) # 分组求和排序
df1.to_csv(filename, header=None)
# df1
编辑 (opens new window)
上次更新: 2022/04/13, 21:51:31