diff --git a/projet_texte/interface.py b/projet_texte/interface.py index e183995d524b35d846a5e283992591189dd50401..f4c05d8608f94a957758b3f199f4b094a1546e2c 100644 --- a/projet_texte/interface.py +++ b/projet_texte/interface.py @@ -16,9 +16,11 @@ from Bio import Entrez from Bio import SeqIO import threading # --- functions --- +is_active = True def function_parser(v, EtatCheckButton, resultat, root): #print(v.get()) + global is_active regions = Selection(EtatCheckButton,resultat) #print(regions) # p = subprocess.run("ls", shell=True, stdout=subprocess.PIPE) @@ -56,19 +58,24 @@ def function_parser(v, EtatCheckButton, resultat, root): "Viruses.ids" ] - genome.create_log_file() + #genome.create_log_file() print("Début téléchargement des génomes") genome.download_genome_files(lst_files) + if(not is_active): + print("Arrêt demandé") + sys.exit() #print("test_1") if not group_name and not subgroup_name: organism_by_kingdom = genome.regroup_organism_by_kingdom(lst_files[0],Kingdom) dataframe_organism = genome.get_dataframe_kingdom(organism_by_kingdom, kingdom_name) - print("fin du premier if") elif subgroup_name: # retourne un dataframe contenant le nom des organismes d'un sous groupe donnée dataframe_organism = genome.get_organism_from_subgroup(lst_files[0],kingdom_name,group_name,subgroup_name) #print(dataframe_organism) + if(not is_active): + print("Arrêt demandé") + sys.exit() dataframe_organism = dataframe_organism.sort_values(["Kingdom", "Group", "SubGroup"]) dataframe_organism.reset_index(drop=True, inplace=True) print("Téléchargement terminé") @@ -80,6 +87,9 @@ def function_parser(v, EtatCheckButton, resultat, root): # on recherche les nc par organisme nc_by_organism = [] for i, organism in enumerate(organism_name): + if(not is_active): + print("Arrêt demandé") + sys.exit() print(f"Dans {kingdom_name}/{organism_group[i]}/{organism_subgroup[i]}") print(f"Regroupement des NC pour l'organisme {organism}") ncs = genome.search_nc_by_organism(kingdom_name, organism) @@ -95,6 +105,9 @@ def function_parser(v, EtatCheckButton, resultat, root): handle.close() if(len(organism[1]) > 1): for r in record: + if(not is_active): + print("Arrêt demandé") + sys.exit() print(f"Ecriture des regions pour {r.name}") genome.write_available_feature(r.features, r, prefix, regions) else: @@ -108,10 +121,6 @@ def function_parser(v, EtatCheckButton, resultat, root): continue print("Parsing terminé") -def thread_function_parser(v, EtatCheckButton, resultat, root): - t = threading.Thread(target=function_parser, args=(v, EtatCheckButton, resultat, root)) - t.start() - def Selection(EtatCheckButton, resultat): region = [] for i in range (10): @@ -140,104 +149,121 @@ class Redirect(): # --- fonction principale --- -def interface_main(): - root = tk.Tk() - - f = font.Font(size=18) - f.configure(underline=True) - - g = font.Font(size=32) - - h = font.Font(size=24) - - root.geometry("1200x800") - root.title('Projet Bioinformatique SDSC Groupe 1') - # root.configure(bg='lightblue') - - label_1 = Label(root, text="GENOME") - label_1['font'] = g - label_1.place(x = 0, y = 0) - - label_2 = Label(root, text="Statistiques sur les genes de la base GenBank", fg="blue") - label_2['font'] = h - label_2.place(x = 0, y = 35+32) - - label = Label(root, text="Sélectionner un Kingdom :") - label['font'] = f - label.place(x = 270, y = 120) - - v = StringVar() - v.set("Eukaryota") - - R1 = Radiobutton(root, text="Eukaryota", variable=v, value="Eukaryota") - R1.place(x = 270, y = 150) - - R2 = Radiobutton(root, text="Bacteria", variable=v, value="Bacteria") - R2.place(x = 370, y = 150) - - R3 = Radiobutton(root, text="Archaea", variable=v, value="Archaea") - R3.place(x = 470, y = 150) - - R4 = Radiobutton(root, text="Viruses", variable=v, value="Viruses") - R4.place(x = 570, y = 150) - - resultat = ["CDS", "centromere", "intron", "mobile_element", "ncRNA", "rRNA", "telomere", "tRNA", "3'UTR", "5'UTR"] - - label = Label(root, text="Les choix des régions fonctionnelles :") - label['font'] = f - label.place(x = 270, y = 180) - - EtatCheckButton =[0 for i in range (10)] - for i in range(10) : - EtatCheckButton[i] = IntVar() - - c1 = tk.Checkbutton(root, text="CDS", variable=EtatCheckButton[0], onvalue=1, offvalue=0) - c1.place(x = 270, y = 210) - c1.select() - c2 = tk.Checkbutton(root, text="centromere", variable=EtatCheckButton[1], onvalue=1, offvalue=0) - c2.place(x = 370, y = 210) - c2.select() - c3 = tk.Checkbutton(root, text="intron", variable=EtatCheckButton[2], onvalue=1, offvalue=0) - c3.place(x = 470, y = 210) - c3.select() - c4 = tk.Checkbutton(root, text="mobile_element", variable=EtatCheckButton[3], onvalue=1, offvalue=0) - c4.place(x = 570, y = 210) - c4.select() - c5 = tk.Checkbutton(root, text="ncRNA", variable=EtatCheckButton[4], onvalue=1, offvalue=0) - c5.place(x = 700, y = 210) - c5.select() - c6 = tk.Checkbutton(root, text="rRNA", variable=EtatCheckButton[5], onvalue=1, offvalue=0) - c6.place(x = 270, y = 230) - c6.select() - c7 = tk.Checkbutton(root, text="telomere", variable=EtatCheckButton[6], onvalue=1, offvalue=0) - c7.place(x = 370, y = 230) - c7.select() - c8 = tk.Checkbutton(root, text="tRNA", variable=EtatCheckButton[7], onvalue=1, offvalue=0) - c8.place(x = 470, y = 230) - c8.select() - c9 = tk.Checkbutton(root, text="3'UTR", variable=EtatCheckButton[8], onvalue=1, offvalue=0) - c9.place(x = 570, y = 230) - c9.select() - c10 = tk.Checkbutton(root, text="5'UTR", variable=EtatCheckButton[9], onvalue=1, offvalue=0) - c10.place(x = 670, y = 230) - c10.select() - - scroll = Scrollbar(root) - scroll.pack(side=RIGHT, fill=Y) - - text = tk.Text(root, bg = "white", width=200, yscrollcommand=scroll.set) - text.place(x = 0, y = 260) - text.bind('<<Modified>>', showEnd) - - scroll.config(command=text.yview) - - old_stdout = sys.stdout - sys.stdout = Redirect(text) - button = tk.Button(root, text='Démarrer', command=lambda : thread_function_parser(v, EtatCheckButton, resultat,root)) - button.place(x = 580, y = 700) - - - root.mainloop() - +class InterfaceManager(): + + def __init__(self) -> None: + self.root = tk.Tk() + self.f = font.Font(size=18) + self.f.configure(underline=True) + + self.g = font.Font(size=30) + + self.h = font.Font(size=20) + + self.root.geometry("1200x800") + self.root.title('Projet Bioinformatique SDSC Groupe 1') + # root.configure(bg='lightblue') + + self.label_1 = Label(self.root, text="GENOME") + self.label_1['font'] = self.g + self.label_1.place(x = 0, y = 0) + + self.label_2 = Label(self.root, text="Acquisition des régions fonctionnelles dans les génomes", fg="blue") + self.label_2['font'] = self.h + self.label_2.place(x = 0, y = 35+32) + + self.label = Label(self.root, text="Sélectionner un Kingdom :") + self.label['font'] = self.f + self.label.place(x = 270, y = 120) + + self.v = StringVar() + self.v.set("Eukaryota") + + self.R1 = Radiobutton(self.root, text="Eukaryota", variable=self.v, value="Eukaryota") + self.R1.place(x = 270, y = 150) + + self.R2 = Radiobutton(self.root, text="Bacteria", variable=self.v, value="Bacteria") + self.R2.place(x = 370, y = 150) + self.R3 = Radiobutton(self.root, text="Archaea", variable=self.v, value="Archaea") + self.R3.place(x = 470, y = 150) + self.R4 = Radiobutton(self.root, text="Viruses", variable=self.v, value="Viruses") + self.R4.place(x = 570, y = 150) + + self.resultat = ["CDS", "centromere", "intron", "mobile_element", "ncRNA", "rRNA", "telomere", "tRNA", "3'UTR", "5'UTR"] + + self.label_3 = Label(self.root, text="Les choix des régions fonctionnelles :") + self.label_3['font'] = self.f + self.label_3.place(x = 270, y = 180) + + self.EtatCheckButton =[0 for i in range (10)] + for i in range(10) : + self.EtatCheckButton[i] = IntVar() + + self.c1 = tk.Checkbutton(self.root, text="CDS", variable=self.EtatCheckButton[0], onvalue=1, offvalue=0) + self.c1.place(x = 270, y = 210) + self.c1.select() + self.c2 = tk.Checkbutton(self.root, text="centromere", variable=self.EtatCheckButton[1], onvalue=1, offvalue=0) + self.c2.place(x = 370, y = 210) + self.c2.select() + self.c3 = tk.Checkbutton(self.root, text="intron", variable=self.EtatCheckButton[2], onvalue=1, offvalue=0) + self.c3.place(x = 470, y = 210) + self.c3.select() + self.c4 = tk.Checkbutton(self.root, text="mobile_element", variable=self.EtatCheckButton[3], onvalue=1, offvalue=0) + self.c4.place(x = 570, y = 210) + self.c4.select() + self.c5 = tk.Checkbutton(self.root, text="ncRNA", variable=self.EtatCheckButton[4], onvalue=1, offvalue=0) + self.c5.place(x = 700, y = 210) + self.c5.select() + self.c6 = tk.Checkbutton(self.root, text="rRNA", variable=self.EtatCheckButton[5], onvalue=1, offvalue=0) + self.c6.place(x = 270, y = 230) + self.c6.select() + self.c7 = tk.Checkbutton(self.root, text="telomere", variable=self.EtatCheckButton[6], onvalue=1, offvalue=0) + self.c7.place(x = 370, y = 230) + self.c7.select() + self.c8 = tk.Checkbutton(self.root, text="tRNA", variable=self.EtatCheckButton[7], onvalue=1, offvalue=0) + self.c8.place(x = 470, y = 230) + self.c8.select() + self.c9 = tk.Checkbutton(self.root, text="3'UTR", variable=self.EtatCheckButton[8], onvalue=1, offvalue=0) + self.c9.place(x = 570, y = 230) + self.c9.select() + self.c10 = tk.Checkbutton(self.root, text="5'UTR", variable=self.EtatCheckButton[9], onvalue=1, offvalue=0) + self.c10.place(x = 670, y = 230) + self.c10.select() + + self.scroll = Scrollbar(self.root) + self.scroll.pack(side=RIGHT, fill=Y) + self.text = tk.Text(self.root, bg = "white", width=200, yscrollcommand=self.scroll.set) + self.text.place(x = 0, y = 260) + self.text.bind('<<Modified>>', showEnd) + self.scroll.config(command=self.text.yview) + self.old_stdout = sys.stdout + sys.stdout = Redirect(self.text) + self.button = tk.Button(self.root, text='Démarrer', command=lambda : self.thread_function_parser(self.v, self.EtatCheckButton, self.resultat, self.root, self.button['text'])) + self.button.place(x = 580, y = 700) + + + self.root.mainloop() + + def thread_function_parser(self, v, EtatCheckButton, resultat, root, text): + global is_active + if (text == "Stop"): + is_active = False + self.button['text'] = "En attente" + self.button['state'] = DISABLED + self.waithere() + self.button['text'] = "Démarrer" + self.button['state'] = NORMAL + else: + is_active = True + self.text.delete('1.0', END) + self.button['text'] = "Stop" + t = threading.Thread(target=function_parser, args=(v, EtatCheckButton, resultat, root)) + t.start() + + def waithere(self): + var = IntVar() + self.root.after(3000, var.set, 1) + print("En attente de terminaison") + self.root.wait_variable(var) # if __name__ == "__main__": # interface_main() diff --git a/projet_texte/main.py b/projet_texte/main.py index c2192ff787553748a3a9ddad5b3df1cea068cf91..521a21bfdf1af4ff06d244b64bc09f590564d695 100644 --- a/projet_texte/main.py +++ b/projet_texte/main.py @@ -20,131 +20,4 @@ Entrez.email = "jeanlucphan84@gmail.com" Entrez.api_key = "81005b6f8a9fdffc555ad15df06c18bcd409" if __name__ == "__main__": - #id_test = "NC_037304.1" - #id_test = "NC_005562.1" - """ - regions = ["CDS", "centromere", "intron", "mobile_element", "ncRNA", "rRNA", "telomere", "tRNA", "3'UTR", "5'UTR"] - - files_names = [ - "eukaryotes.txt", - "overview.txt", - "plasmids.txt", - "prok_reference_genomes.txt", - "prok_representative_genomes.txt", - "prokaryotes.txt", - "viruses.txt" - ] - - ids_file = [ - "Archaea.ids", - "Bacteria.ids", - "Eukaryota.ids", - "Mito_metazoa.ids", - "Phages.ids", - "Plasmids.ids", - "Samples.ids", - "Viroids.ids", - "Viruses.ids", - "dsDNA_Viruses.ids" - ] - - lst_files = [ - "overview.txt", - "Archaea.ids", - "Bacteria.ids", - "Eukaryota.ids", - "Viruses.ids" - ] - """ - - # ----------- Interface ---------------- - - interface.interface_main() - - - Kingdom = ["Archaea", "Bacteria", "Eukaryota", "Viruses"] - regions = ["CDS", "centromere", "intron", "mobile_element", "ncRNA", "rRNA", "telomere", "tRNA", "3'UTR", "5'UTR"] - # time.sleep(600) - - # Sélection du kingdom, group et sous group - #kingdom_name = Kingdom[2] - #group_name = "Plants" - #subgroup_name = "Land Plants" - #prefix = "Results/{}/{}/{}".format(kingdom_name, group_name, subgroup_name) - - # Téléchargment des fichiers se trouvant dans lst_files via ftp - """ - genome.create_log_file() - print("Début téléchargement des génomes") - genome.download_genome_files(lst_files) - if not group_name and not subgroup_name: - organism_by_kingdom = genome.regroup_organism_by_kingdom(lst_files[0],Kingdom) - dataframe_organism = genome.get_dataframe_kingdom(organism_by_kingdom, kingdom_name) - elif subgroup_name: - # retourne un dataframe contenant le nom des organismes d'un sous groupe donnée - dataframe_organism = genome.get_organism_from_subgroup(lst_files[0],kingdom_name,group_name,subgroup_name) - - dataframe_organism = dataframe_organism.sort_values(["Kingdom", "Group", "SubGroup"]) - dataframe_organism.reset_index(drop=True, inplace=True) - print("Téléchargement terminé") - # df_log = genome.read_log() - print("Début extraction") - # if (not df_log.empty): - # print("Vérification des NC déjà traités") - # last_organism = df_log.iloc[-1,1] - # last_nc = df_log.iloc[-1,0] - # df_last_index = dataframe_organism[dataframe_organism['#Organism/Name'] == last_organism] - # if(not df_last_index.empty): - # dataframe_organism.drop(dataframe_organism.index[:df_last_index.index[0]], inplace=True) - organism_name = dataframe_organism["#Organism/Name"].tolist() - organism_group = dataframe_organism["Group"].tolist() - organism_subgroup = dataframe_organism["SubGroup"].tolist() - - # on recherche les nc par organisme - nc_by_organism = [] - nc_to_parse = [] - #nc_to_remove = genome.remove_treated_nc() - for i, organism in enumerate(organism_name): - #if ((i >= 1) and (organism_subgroup[i] != organism_subgroup[i-1])): - print(f"Dans {kingdom_name}/{organism_group[i]}/{organism_subgroup[i]}") - print(f"Regroupement des NC pour l'organisme {organism}") - ncs = genome.search_nc_by_organism(kingdom_name, organism) - #nc_to_parse = [nc for nc in ncs if nc not in nc_to_remove] - nc_by_organism.append([organism, ncs, organism_group[i], organism_subgroup[i]]) - #nc_to_parse = [] - - # Pour un NC donné, extraction et écriture des séquences dans les fichiers par régions - # Récupération du gene via id - # if (not df_log.empty): - # last_index = genome.search_last_nc_index(nc_by_organism, last_nc) - # if (last_index != -1): - # del nc_by_organism[:last_index] - # del nc_by_organism[0][1][:nc_by_organism[0][1].index(last_nc)+1] - print("Début extraction par organisme") - for organism in nc_by_organism: - prefix = "Results/{}/{}/{}/".format(kingdom_name, organism[2], organism[3]) - #print(prefix) - #for nc in organism[1]: - try: - handle = Entrez.efetch(db="nucleotide",id=','.join(organism[1]), rettype="gb", retmode="text", style="withparts") - record = SeqIO.read(handle, "genbank") - handle.close() - if(len(organism[1]) > 1): - for r in record: - #record = SeqIO.read(handle, "genbank") - print(f"Ecriture des regions pour {r.name}") - genome.write_available_feature(r.features, r, prefix, regions) - else: - print(f"Ecriture des regions pour {record.name}") - genome.write_available_feature(record.features, record, prefix, regions) - except KeyboardInterrupt: - print("Arret du programme") - sys.exit() - except (ValueError, urllib.error.HTTPError): - #print(f"Lecture du nc {record.name} impossible") - continue - #except : - # print(f"Lecture du nc {nc} impossible") - # continue - print("Parsing terminé") - """ + app = interface.InterfaceManager()