diff --git a/projet_texte/genome.py b/projet_texte/genome.py index e308b7336cf23fde3c87e6b4b6eeb30e15d58594..1e5c40c759500bcc1e2c188101b7cefde5b03d7b 100644 --- a/projet_texte/genome.py +++ b/projet_texte/genome.py @@ -26,6 +26,7 @@ def create_tree(df): try: Path(prefix).mkdir(parents=True, exist_ok=True) except: + print(f"Erreur pour la création du dossier {prefix}") continue print("Regroupement des organismes par Kingdom") @@ -114,7 +115,7 @@ def get_organism_from_subgroup(df, kingdom, group, subgroup): #print("Regroupement terminé\n") return result -def search_nc_by_organism(kingdom, organism_name): +def search_nc_by_organism(kingdom, organism_name, df_log): ''' Renvoie tous les NC correspondant à un organisme @@ -129,9 +130,19 @@ def search_nc_by_organism(kingdom, organism_name): df = pd.read_csv(path, sep="\t", header=None) df.columns = ["1", "NC", "3", "4", "5", "Organism", "7"] df_tmp = df[df['NC'].str.contains('NC_')] - nc_organism = df_tmp[["NC", "Organism"]] - result = nc_organism.loc[nc_organism["Organism"] == organism_name, "NC"] - return result.tolist() + nc_organism = df_tmp[["NC", "Organism"]] + result = nc_organism[[organism_name in organism for organism in nc_organism['Organism']]]["NC"] + #result = nc_organism.loc[nc_organism["Organism"].str.contains(organism_name, regex=False), "NC"] + #result = nc_organism.loc[nc_organism["Organism"].str.contains(organism_name), "NC"] + df_result = result.to_frame() + + #df_result[~df_result.apply(tuple,1).isin(df_log.apply(tuple,1))] + res = pd.concat([df_result, df_log]).drop_duplicates(keep=False) + #res = res.reset_index(drop=True) + #df_gpby = res.groupby(list(res.columns)) + #idx = [x[0] for x in df_gpby.groups.values() if len(x) == 1] + #res.reindex(idx) + return res['NC'].values.tolist() def has_valid_boundary(a, b, max_length): if ((not a.isdigit()) or (int(a) > max_length)): @@ -144,7 +155,7 @@ def has_valid_boundary(a, b, max_length): return False return True -def write_available_feature(record, prefix_path, regions, kingdom): +def write_available_feature(record, prefix_path, regions, kingdom, name_organism): ''' Renvoie les régions valides @@ -155,7 +166,6 @@ def write_available_feature(record, prefix_path, regions, kingdom): Liste correspondant à l'intersection des listes features et regions ''' #print(f"Dans {prefix_path}") - name_organism = record.annotations["organism"] prefix = f"{prefix_path}{name_organism}" range_index = "" region_to_parse = [] @@ -295,8 +305,8 @@ def write_available_feature(record, prefix_path, regions, kingdom): try: with open(filename, 'a+') as external_file: print("Dans",filename) - print(name_request, file=external_file) - print(feature.extract(record.seq), file=external_file) + #print(name_request, file=external_file) + #print(feature.extract(record.seq), file=external_file) name_request = "intron {} {}: join({})".format(record.annotations["organism"], record.name, range_index) for i in range(len(reverse_location) - 1): feature_tmp = SeqFeature(FeatureLocation(reverse_location[i][1], reverse_location[i+1][0], strand=1), type="CDS") @@ -311,8 +321,8 @@ def write_available_feature(record, prefix_path, regions, kingdom): try: with open(filename, 'a+') as external_file: print("Dans",filename) - print(name_request, file=external_file) - print(feature.extract(record.seq), file=external_file) + #print(name_request, file=external_file) + #print(feature.extract(record.seq), file=external_file) name_request = "intron {} {}: complement(join({}))".format(record.annotations["organism"], record.name, range_index) for i in range(len(reverse_location) - 1): feature_tmp = SeqFeature(FeatureLocation(reverse_location[len(reverse_location)-1-i-1][1], reverse_location[len(reverse_location)-1-i][0], strand=-1), type="CDS") @@ -404,7 +414,7 @@ def read_log(): df = pd.read_csv(path, sep="\t", header=None) df.columns = ["NC", "Organism", "Kingdom"] return df - return pd.DataFrame() + return pd.DataFrame(columns= ["NC", "Organism", "Kingdom"]) def search_last_nc_index(lst_nc, nc): for i, ncs in enumerate(lst_nc): diff --git a/projet_texte/interface.py b/projet_texte/interface.py index b90a2454666fea532ca9c26d8e4e61afe87578a0..7e6e0472080c41f242e2c0f4b07b8baea39e70ad 100644 --- a/projet_texte/interface.py +++ b/projet_texte/interface.py @@ -75,7 +75,8 @@ class Redirect(): # --- fonction principale --- -ctk.set_appearance_mode("System") # Modes: system (default), light, dark +ctk.set_appearance_mode("dark") +#ctk.set_appearance_mode("System") # Modes: system (default), light, dark ctk.set_default_color_theme("blue") # Themes: blue (default), dark-blue, green class InterfaceManager(): @@ -170,16 +171,17 @@ class InterfaceManager(): # file_image = Image.open("file.png") # file_image = file_image.resize((16, 16), Image.ANTIALIAS) # file_image = ImageTk.PhotoImage(file_image) - - ###Treeview Customisation (theme colors are selected) + ###Treeview Customisation (theme colors are selected) treestyle = ttk.Style() treestyle.theme_use('default') treestyle.configure("Treeview", background="#565B5E", foreground="#DCE4EE", fieldbackground="#565B5E", - borderwidth=0) + borderwidth=0, + font = 18, + rowheight = 28) treestyle.map('Treeview', background=[('selected', "#1B73C2")], foreground=[('selected', "#DCE4EE")]) @@ -192,14 +194,13 @@ class InterfaceManager(): self.frame_1.pack_propagate(0) self.tree = ttk.Treeview(self.frame_1, show="tree") - self.tree.pack(expand=1, fill="both",side = LEFT) - - #self.tree.place(x = 0, y = 270, width=500, height=387) - - self.text = ctk.CTkTextbox(self.frame_1) + self.tree.pack(expand=1, fill="both",side = LEFT) + self.scroll = Scrollbar(self.frame_1) + self.scroll.pack(side=RIGHT, fill=Y) + self.text = tk.Text(self.frame_1, bg = "white", yscrollcommand=self.scroll.set, font=18) self.text.pack(expand=1, fill="both", side = RIGHT) - #self.text.place(x = 500, y = 270) self.text.bind('<<Modified>>', showEnd) + self.scroll.config(command=self.text.yview) self.old_stdout = sys.stdout sys.stdout = Redirect(self.text) self.myvar = StringVar() @@ -225,18 +226,20 @@ class InterfaceManager(): "Eukaryota.ids", "Viruses.ids" ] - self.button = ctk.CTkButton(self.root, text='Téléchargement fichiers', command=self.download_file) - self.button.invoke() - self.button.configure(state = DISABLED) + self.button = ctk.CTkButton(self.root, text='Démarrer', command=lambda : self.thread_function_parser(self.v, self.EtatCheckButton, self.resultat, self.root, self.button.cget("text"))) self.button.place(x = 580, y = 660) - self.button.configure + self.button.place_forget() + self.button2 = ctk.CTkButton(self.root, text='Téléchargement fichiers', command=self.download_file) + self.button2.invoke() + self.button2.configure(state = DISABLED) + self.button2.place(x = 580, y = 660) self.root.mainloop() def download_file(self): self.button.state = "disabled" t = threading.Thread(target=self.extract_tree) t.start() - self.schedule_check(t) + #self.schedule_check(t) def schedule_check(self,t): self.root.after(1000, self.check_if_done, t) @@ -280,7 +283,8 @@ class InterfaceManager(): genome.create_tree(self.df) self.create_tree() print("Génération de l'arborescence terminée") - self.button.destroy() + self.button2.place_forget() + self.button2.destroy() self.button = ctk.CTkButton(self.root, text='Démarrer', command=lambda : self.thread_function_parser(self.v, self.EtatCheckButton, self.resultat, self.root, self.button.cget("text"))) self.button.place(x = 580, y = 660) @@ -301,7 +305,7 @@ class InterfaceManager(): def progress_3(self, Progressbar, longueur): if Progressbar.get() < 1: - Progressbar.set(Progressbar.get() + longueur) + Progressbar.set(longueur) else: print("The progress completed!") @@ -409,7 +413,7 @@ class InterfaceManager(): if(not is_active): print("Mise en pause de l'acquisition") - while(self.button.text != "Stop"): + while(self.button.cget("text") != "Stop"): pass @@ -424,7 +428,7 @@ class InterfaceManager(): #print(dataframe_organism) if(not is_active): print("Mise en pause de l'acquisition") - while(self.button.text != "Stop"): + while(self.button.cget("text") != "Stop"): pass dataframe_organism = dataframe_organism.sort_values(["Kingdom", "Group", "SubGroup"]) dataframe_organism.reset_index(drop=True, inplace=True) @@ -442,28 +446,32 @@ class InterfaceManager(): # on recherche les nc par organisme nc_by_organism = [] + df_ncs_tmp = pd.DataFrame(columns= ["NC", "Organism", "Kingdom"]) + for i, organism in enumerate(organism_name): self.progress_2(self.Progressbar_2, longueur) if(not is_active): print("Mise en pause de l'acquisition") - while(self.button.text != "Stop"): + while(self.button.cget("text") != "Stop"): pass print(f"Recherche des NC pour l'organisme {organism}") - ncs = genome.search_nc_by_organism(kingdom_name, organism) + if (not df_log.empty): + df_ncs_tmp = df_log[[organism in o for o in df_log['Organism']]]["NC"].to_frame() + ncs = genome.search_nc_by_organism(kingdom_name, organism, df_ncs_tmp) if(ncs): print(f"Regroupement des NC pour l'organisme {organism}\n") + nc_by_organism.append([organism, ncs, organism_group[i], organism_subgroup[i]]) else: - print(f"NC non trouvé pour l'organisme {organism}\n") + print(f"NC non trouvé ou déjà acquis pour l'organisme {organism}\n") - nc_by_organism.append([organism, ncs, organism_group[i], organism_subgroup[i]]) - if (not df_log.empty): - df_tmp = df_log.loc[df_log["Kingdom"] == kingdom_name] - if (not df_tmp.empty): - last_nc = df_tmp.iloc[-1,0] - last_index = genome.search_last_nc_index(nc_by_organism, last_nc) - if (last_index != -1): - nc_by_organism = nc_by_organism[last_index:] + nc_by_organism[:last_index] + #if (not df_log.empty): + # df_tmp = df_log.loc[df_log["Kingdom"] == kingdom_name] + # if (not df_tmp.empty): + # last_nc = df_tmp.iloc[-1,0] + # last_index = genome.search_last_nc_index(nc_by_organism, last_nc) + # if (last_index != -1): + # nc_by_organism = nc_by_organism[last_index:] + nc_by_organism[:last_index] # Pour un NC donné, extraction et écriture des séquences dans les fichiers par régions # Récupération du gene via id @@ -471,11 +479,15 @@ class InterfaceManager(): print(f"Fin de la recherche des NC pour {kingdom_name} {group_name} {subgroup_name}".center(80,"-")) print("\n","Début d'extraction".center(80, "-")) longueur_2 = len(nc_by_organism) + if (longueur_2 == 0): + self.progress_3(self.Progressbar_3, 1) for organism in nc_by_organism: self.progress_2(self.Progressbar_3, longueur_2) prefix = "Results/{}/{}/{}/".format(kingdom_name, organism[2], organism[3]) for nc in organism[1]: - try: + if (nc in df_log['NC'].unique()): + continue + try: print(f"\nLecture de {nc}") handle = Entrez.efetch(db="nucleotide",id=nc, rettype="gbwithparts", retmode="text") record = SeqIO.read(handle, "genbank") @@ -491,10 +503,11 @@ class InterfaceManager(): else: if(not is_active): print("Mise en pause de l'acquisition") - while(self.button.text != "Stop"): + while(self.button.cget("text") != "Stop"): pass print(f"Ecriture des régions pour {record.name}") - genome.write_available_feature(record, prefix, regions, kingdom_name) + genome.write_available_feature(record, prefix, regions, kingdom_name, organism[0]) + df_log.loc[len(df_log)] = [nc, organism[0], kingdom_name] is_active = False self.button.configure(text = "Démarrer") print("\n","Fin d'extraction".center(80, "-"),"\n")