Skip to content
Snippets Groups Projects
Commit 7a7d5a9d authored by Ziqian's avatar Ziqian
Browse files

input and name_screen

parent a8f06914
No related branches found
No related tags found
No related merge requests found
EVMLVESGGGLVMPGGSLKLSCAASAMSWVRQIPEKRLEWVATITYFPDSVKGRFTISRDNAKNTLYLRMSSLRSEDTAMYYCVRWGQGTTLTVSS
GFTFSNY
SIGGHF
HEGYGRPYFDY
\ No newline at end of file
import pandas as pd
import sys
sys.path.append('.')
def screen(specie_txt):
with open(specie_txt, "r") as f:
tar_specie = f.read()
csv_file = "./heavy_safe85.csv"
csv_data = pd.read_csv(csv_file, low_memory=False)
csv_df = pd.DataFrame(csv_data)
# 筛选物种
df1 = csv_df[csv_df["species"].map(lambda x: x == tar_specie)]
startnum = df1["Startnum"].tolist()
specie = df1["species"].tolist()
# 获取物种对应FR区片段 list中每个FR序列是str
seq_ori = (df1['myFR1'] + df1['myFR2'] + df1['myFR3'] + df1['myFR4']).tolist()
seq = []
# 先补齐FR1的第一个氨基酸
for i in range(len(seq_ori)):
aa = seq_ori[i]
if startnum[i] == 'H2':
aa = '#' + seq_ori[i]
seq.append(aa)
# 创建新dataframe,每列对应一个氨基酸
df = pd.DataFrame()
j = 0
for i in range(25):
a_1 = [aa[j] for aa in seq]
j = j + 1
df[f'X_{i + 1}'] = a_1
for i in range(32, 51):
a_1 = [aa[j] for aa in seq]
j = j + 1
df[f'X_{i + 1}'] = a_1
for i in range(56, 82):
a_1 = [aa[j] for aa in seq]
j = j + 1
df[f'X_{i + 1}'] = a_1
a_1 = [aa[j] for aa in seq]
df[f'X_82A'] = a_1
j = j + 1
a_1 = [aa[j] for aa in seq]
df[f'X_82B'] = a_1
j = j + 1
a_1 = [aa[j] for aa in seq]
df[f'X_82C'] = a_1
j = j + 1
for i in range(82, 94):
a_1 = [aa[j] for aa in seq]
j = j + 1
df[f'X_{i + 1}'] = a_1
for i in range(102, 113):
a_1 = [aa[j] for aa in seq]
j = j + 1
df[f'X_{i + 1}'] = a_1
df['specie'] = specie
outputpath = specie[0] + '.csv'
df.to_csv(outputpath, sep=',', index=False, header=True)
if __name__ == "__main__":
specie_txt = './specie_target.txt'
screen(specie_txt)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment