//$MENU window=main;popup=file;insertafter=delete experiment file;name=Import sequence file(s) //$BUTTON window=main;hint=Import sequence file(s) in EMBL, GenBank or FASTA format;position=1 string filenamelist,filename,fileformat; string experlist,expername,fieldlist,line,st,seqstring; string key,report; DIALOG dlg,dlg2; integer x,ok,i,j,x0,y0,xmax,ymax,ct,seqstarted,seqended,basesstarted; integer tagcount,keytag,currenttag; string tags[],taginfo[],tagfield[],tagcontent[]; string str_tagnr[]; integer tagnr[]; FILE fp; SEQUENCE cs; if not(FilePromptName("Select the sequence file(s) you want to import",filenamelist,1)) then stop; //Prompt for file type & experiment name fileformat="EMBL"; for i=1 to DbGetExperCount do if DbGetExperClass(i)="SEQ" then experlist=experlist+DbGetExperName(i)+" "; expername=experlist;expername=splitstring(expername," "); DlgAddFrame(dlg,"File format",190,20,110,95); DlgAddList(dlg,"EMBL GenBank FASTA",fileformat,210,40,70,70,"LIST"); DlgAddFrame(dlg,"Experiment",20,20,150,190); DlgAddList(dlg,experlist,expername,40,40,110,170,"LIST"); x=DlgShow(dlg,"Import sequence file",330,260); if x=0 then stop; fieldlist="-- none -- Key "; for i=1 to DbGetFieldCount do fieldlist=fieldlist+DbGetFieldName(i)+" "; //************************************************************* //*** IMPORT EMBL *** //************************************************************* if fileformat="EMBL" then { //prompt for EMBL tag -- database field links tagcount=10; tags[1]="AC";taginfo[1]="Accession"; tags[2]="ID";taginfo[2]="Identity"; tags[3]="DT";taginfo[3]="Date"; tags[4]="DE";taginfo[4]="Description"; tags[5]="KW";taginfo[5]="Keyword"; tags[6]="OS";taginfo[6]="Organism sp."; tags[7]="OC";taginfo[7]="Organism class"; tags[8]="OG";taginfo[8]="Organelle"; tags[9]="SV";taginfo[9]="Sequence ver."; tags[10]="NI";taginfo[10]="Nucl. ident."; for i=1 to tagcount do { x0=40;y0=i*30+10; tagfield[i]="-- none --"; DlgAddText(dlg2,tags[i],x0,y0,30,20); DlgAddText(dlg2,"("+taginfo[i]+")",x0+30,y0,85,20); DlgAddList(dlg2,fieldlist,tagfield[i],x0+130,y0-2,115,120,"DROP"); } DlgAddFrame(dlg2,"EMBL tag",20,20,140,330); DlgAddFrame(dlg2,"Database field",160-1,20,140,330); ok=0; while not(ok) do { x=DlgShow(dlg2,"Import EMBL file",420,390); if x=0 then stop; ok=1; for i=1 to tagcount do if tagfield[i]<>"-- none --" then for j=1 to i-1 do if ok and (tagfield[i]=tagfield[j]) then { ok=0; message("ERROR~n~nField '"+tagfield[i]+"' is specified several times.~nEach database field should be specified no more than once"); } } keytag=0; for i=1 to tagcount do if tagfield[i]="key" then keytag=i; //read all the files ct=1; while filenamelist<>"" do { filename=splitstring(filenamelist," "); if not(FileOpenRead(fp,filename)) then { message("ERROR~n~nUnable to read the file '"+filename+"'"); stop; } ok=1;seqstarted=0;seqended=0;basesstarted=0; for i=1 to tagcount do tagcontent[i]=""; setbusy("Importing sequence "+str(ct,0,0)); while ok and not(FileIsEnd(fp)) do { line=FileRead(fp,9999); if not(basesstarted) and (length(line)>5) and (substring(line,3,5)=" ") then { st=substring(line,1,2); for i=1 to tagcount do if st=tags[i] then { seqstarted=1; if tagcontent[i]<>"" then tagcontent[i]=tagcontent[i]+" "; tagcontent[i]=tagcontent[i]+substring(line,6,9999); } } if substring(line,1,2)="//" then { seqended=1;basesstarted=0; } if basesstarted then seqstring=seqstring+line; if FileIsEnd(fp) then { ok=0; seqended=1; } if substring(line,1,2)="SQ" then { seqstarted=1; basesstarted=1; } if seqended then { if seqstarted then { for i=1 to tagcount do tagcontent[i]=substring(tagcontent[i],1,79); if substring(tagcontent[1],length(tagcontent[1]),length(tagcontent[1]))=";" then tagcontent[1]=substring(tagcontent[1],1,length(tagcontent[1])-1); key=""; if keytag>0 then key=tagcontent[keytag]; if key="" then key=DbAddEntry(""); if not(DbIsKeyPresent(key)) then DbAddEntry(key); ok=SeqLoad(cs,key,expername); if not(ok) then ok=SeqCreate(cs,expername,"",key); if ok then { SeqSet(cs,seqstring); SeqSave(cs); } else report=report+"Unable to create sequence for key '"+key+"'~n"; for i=1 to tagcount do if (tagfield[i]<>"-- none --") and (tagfield[i]<>"key") then DbSetField(key,tagfield[i],tagcontent[i]); } seqstarted=0;seqended=0;basesstarted=0; seqstring=""; for i=1 to tagcount do tagcontent[i]=""; ct=ct+1; setbusy("Importing sequence "+str(ct,0,0)); } } FileClose(fp); } DbSaveFields; setbusy(""); } //************************************************************* //*** IMPORT GenBank *** //************************************************************* if fileformat="GenBank" then { //prompt for EMBL tag -- database field links tagcount=8; tags[1]="ACCESSION"; tags[2]="LOCUS"; tags[3]="DEFINITION"; tags[4]="KEYWORDS"; tags[5]="SOURCE"; tags[6]="ORGANISM"; tags[7]="VERSION"; tags[8]="NID"; for i=1 to tagcount do { x0=40;y0=i*30+10; tagfield[i]="-- none --"; DlgAddText(dlg2,tags[i],x0,y0,60,20); DlgAddList(dlg2,fieldlist,tagfield[i],x0+90,y0-2,115,120,"DROP"); } DlgAddFrame(dlg2,"GenBank tag",20,20,100,300); DlgAddFrame(dlg2,"Database field",120-1,20,140,300); ok=0; while not(ok) do { x=DlgShow(dlg2,"Import GenBank file",420,330); if x=0 then stop; ok=1; for i=1 to tagcount do if tagfield[i]<>"-- none --" then for j=1 to i-1 do if ok and (tagfield[i]=tagfield[j]) then { ok=0; message("ERROR~n~nField '"+tagfield[i]+"' is specified several times.~nEach database field should be specified no more than once"); } } keytag=0; for i=1 to tagcount do if tagfield[i]="key" then keytag=i; //read all the files ct=1; while filenamelist<>"" do { filename=splitstring(filenamelist," "); if not(FileOpenRead(fp,filename)) then { message("ERROR~n~nUnable to read the file '"+filename+"'"); stop; } ok=1;seqstarted=0;seqended=0;basesstarted=0; for i=1 to tagcount do tagcontent[i]=""; setbusy("Importing sequence "+str(ct,0,0)); currenttag=0; while ok and not(FileIsEnd(fp)) do { line=FileRead(fp,9999); if not(basesstarted) and (length(line)>13) then { st=substring(line,1,12); if st<>" " then currenttag=0; for i=1 to tagcount do if find(st,tags[i]+" ",1)>0 then { currenttag=i;seqstarted=1; } if currenttag>0 then { if tagcontent[currenttag]<>"" then tagcontent[currenttag]=tagcontent[currenttag]+" "; tagcontent[currenttag]=tagcontent[currenttag]+substring(line,13,9999); } } if substring(line,1,2)="//" then { seqended=1;basesstarted=0; } if basesstarted then seqstring=seqstring+line; if FileIsEnd(fp) then { ok=0; seqended=1; } if substring(line,1,6)="ORIGIN" then { seqstarted=1; basesstarted=1; } if seqended then { if seqstarted then { for i=1 to tagcount do tagcontent[i]=substring(tagcontent[i],1,79); if substring(tagcontent[1],length(tagcontent[1]),length(tagcontent[1]))=";" then tagcontent[1]=substring(tagcontent[1],1,length(tagcontent[1])-1); key=""; if keytag>0 then key=tagcontent[keytag]; if key="" then key=DbAddEntry(""); if not(DbIsKeyPresent(key)) then DbAddEntry(key); ok=SeqLoad(cs,key,expername); if not(ok) then ok=SeqCreate(cs,expername,"",key); if ok then { SeqSet(cs,seqstring); SeqSave(cs); } else report=report+"Unable to create sequence for key '"+key+"'~n"; for i=1 to tagcount do if (tagfield[i]<>"-- none --") and (tagfield[i]<>"key") then DbSetField(key,tagfield[i],tagcontent[i]); } seqstarted=0;seqended=0;basesstarted=0; seqstring="";currenttag=""; for i=1 to tagcount do tagcontent[i]=""; ct=ct+1; setbusy("Importing sequence "+str(ct,0,0)); } } FileClose(fp); } DbSaveFields; setbusy(""); } //*********************************************************** //*** Import FASTA *** //*********************************************************** if fileformat="FASTA" then { x0=20;y0=20;xmax=20;ymax=20; for i=0 to DbGetFieldCount do { y0=y0+30; if y0>250 then { x0=x0+160; y0=50; } if x0>xmax then xmax=x0; if y0>ymax then ymax=y0; st="Key";if i>0 then st=DbGetFieldName(i); for j=1 to 100 do st=st+" ."; DlgAddText(dlg2,st,x0,y0,100,15); DlgAddEdit(dlg2,str_tagnr[i],x0+105,y0,30,20); } DlgAddText(dlg2,"Please enter for every database field the corresponding FASTA field number (if any)",20,10,230,30); x=DlgShow(dlg2,"Import FASTA file",xmax+270,ymax+60); if x=0 then stop; for i=0 to DbGetFieldCount do tagnr[i]=val(str_tagnr[i]); ct=1; while filenamelist<>"" do { filename=splitstring(filenamelist," "); if not(FileOpenRead(fp,filename)) then { message("ERROR~n~nUnable to read the file '"+filename+"'"); stop; } ok=1;seqstarted=0; while ok and not(FileIsEnd(fp)) do { line=FileRead(fp,9999); if (seqstarted) and ((substring(line,1,1)=">") or (FileIsEnd(fp))) then { for i=1 to tagcount do tagcontent[i]=substring(tagcontent[i],1,79); key="";j=tagnr[0]; if (j>0) and (j<=tagcount) then key=tagcontent[j]; if key="" then key=DbAddEntry(""); if not(DbIsKeyPresent(key)) then DbAddEntry(key); ok=SeqLoad(cs,key,expername); if not(ok) then ok=SeqCreate(cs,expername,"",key); if ok then { SeqSet(cs,seqstring); SeqSave(cs); } else report=report+"Unable to create sequence for key '"+key+"'~n"; for i=1 to DbGetFieldCount do { j=tagnr[i]; if (j>0) and (j<=tagcount) then { DbSetField(key,DbGetFieldName(i),tagcontent[j]); } } } if substring(line,1,1)=">" then { ct=ct+1; setbusy("Importing sequence "+str(ct,0,0)); tagcount=0; splitstring(line,">"); while line<>"" do { tagcount=tagcount+1; tagcontent[tagcount]=splitstring(line,"|"); } seqstring=""; seqstarted=1; } else seqstring=seqstring+line; } FileClose(fp); } DbSaveFields; setbusy(""); } #_#_#_#_#_#_# ATTACHMENTS #_#_#_#_#_#_#_ >>ATTACHMENT V_1.0 button 96 EJEJCKAAMEAGAAAAAIAAAIAAAIAAAANFEACLAAKA IGABAANFEACLAAKAIGABAAMAMAMAMAMAMAMAMAMA MBMBMBMJMJMJMEMEMELPLPLPLNLNLNMCMCMCMIMI MIMPMPMPNHNHNHNMNMNMOAOAOAOAOAOANONONONG NGNGMKMKMKMAMAMAMAMAMAMAMAMAMAMAMAMAMAMA MAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMEMEMELBLB LBJPJPJPINININJAJAJAJGJGJGKAKAKAKKKKKKLG LGLGMEMEMENDNDNDNPNPNPOLOLOLPFPFPFPLPLPL PGPGPGPCPCPCOMOMOMNNNNNNMHMHMHMAMAMAMAMA MAMAMAMAMAMAMAMAMAMAMAMAMAMDMDMDICICICHL HLHLIAIAIAIFIFIFIKIKIKJBJBJBJKJKJKKFKFKF LDLDLDMDMDMDNENENEODODODPAPAPAPKPKPKPHPH PHPBPBPBOLOLOLOGOGOGOCOCOCOAOAOAMGMGMGMA MAMAMAMAMAMAMAMAMAMAMAMAMAMAMDMDMDJDJDJD HEHEHEHJHJHJHOHOHOIDIDIDIKIKIKJDJDJDJOJO JOKOKOKOMBMBMBNGNGNGOIOIOIPHPHPHPIPIPIPA PAPAOKOKOKOFOFOFOAOAOANMNMNMNJNJNJMDMDMD MAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMBMB MBLGLGLGJPJPJPJEJEJEIDIDIDICICICILILILJG JGJGKHKHKHLPLPLPNJNJNJOPOPOPPJPJPJPAPAPA OIOIOIOEOEOENJNJNJLCLCLCJFJFJFLHLHLHMAMA MAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMA MAMALMLMLMKPKPKPKLKLKLLCLCLCKOKOKOKFKFKF KFKFKFKIKIKIKNKNKNLCLCLCLFLFLFKIKIKIJFJF JFIEIEIEGLGLGLFCFCFCFHFHFHKOKOKOMAMAMAMA MAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMA MAMAMAMAMAMALKLKLKKNKNKNKHKHKHKBKBKBJKJK JKJCJCJCIKIKIKICICICHJHJHJHBHBHBGJGJGJGB GBGBFKFKFKFEFEFEFEFEFEKHKHKHMAMAMAMAMAMA MAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMA MAMAMAMAMAMAMAMAMAMALILILIKKKKKKKDKDKDJM JMJMJEJEJEILILILIDIDIDHJHJHJHAHAHAGHGHGH FPFPFPFHFHFHFDFDFDJPJPJPIOIOIOJBJBJBMAMA MAMAMAMALDLDLDILILILHPHPHPIJIJIJLBLBLBMA MAMAMAMAMAMAMAMAMAMAMAMAMAMALGLGLGKHKHKH JPJPJPJGJGJGINININIDIDIDHIHIHIGOGOGOGEGE GEFMFMFMFEFEFEJGJGJGLCLCLCIBIBIBIAIAIALF LFLFLOLOLOIGIGIGJPJPJPLLLLLLJOJOJOILILIL MAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMALDLD LDKCKCKCJJJJJJIOIOIOIEIEIEHHHHHHGMGMGMGC GCGCFIFIFIINININMAMAMAJIJIJIJHJHJHJEJEJE JLJLJLLCLCLCIAIAIALOLOLOMAMAMALPLPLPMAMA MAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMALP LPLPKPKPKPJMJMJMJBJBJBIEIEIEHGHGHGGJGJGJ FOFOFOIEIEIELPLPLPLNLNLNICICICKBKBKBJPJP JPIDIDIDKOKOKOIAIAIALPLPLPMAMAMALPLPLPLP LPLPMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMA MAMAMALPLPLPKNKNKNJEJEJEIFIFIFHFHFHFGGGG GGHLHLHLLPLPLPMAMAMAKHKHKHICICICIPIPIPIP IPIPICICICKEKEKEIBIBIBKLKLKLMAMAMAKHKHKH IIIIIIMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMA MAMAMAMAMAMAMANONONONBNBNBKOKOKOJDJDJDHO HOHOJGJGJGMAMAMAMAMAMAINININJOJOJOMAMAMA MAMAMAJNJNJNIOIOIOKJKJKJIBIBIBIHIHIHIAIA IAKIKIKIMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMA MAMAMAMAMAMAMAMANPNPNPNONONOMAMAMAKCKCKC IDIDIDJGJGJGMAMAMAMAMAMALJLJLJLNLNLNMAMA MAMAMAMALNLNLNLJLJLJMAMAMALMLMLMLDLDLDLL LLLLMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMA MAMAMAMAMAMAMAMAMANPNPNPNONONOMAMAMAKCKC KCIDIDIDJGJGJGMAMAMAMAMAMALLLLLLJDJDJDIA IAIAIBIBIBJIJIJILOLOLOJBJBJBIFIFIFIFIFIF IFIFIFIKIKIKMAMAMAMAMAMAMAMAMAMAMAMAMAMA MAMAMAMAMAMAMAMAMAMANPNPNPNONONOMAMAMAKC KCKCIDIDIDJGJGJGMAMAMAMAMAMAJDJDJDJCJCJC LILILILDLDLDIEIEIEKKKKKKLKLKLKLFLFLFHPHP HPLALALALJLJLJMAMAMAMAMAMAMAMAMAMAMAMAMA MAMAMAMAMAMAMAMAMAMAMANPNPNPNONONOMAMAMA KCKCKCIDIDIDJGJGJGMAMAMALOLOLOIAIAIALCLC LCLPLPLPLJLJLJLILILILMLMLMMAMAMALMLMLMHP HPHPLGLGLGMAMAMAMAMAMAMAMAMAMAMAMAMAMAMA MAMAMAMAMAMAMAMAMAMAMAMANPNPNPNONONOMAMA MAKCKCKCIDIDIDJGJGJGMAMAMALNLNLNHPHPHPLD LDLDLMLMLMIFIFIFIBIBIBJMJMJMMAMAMALMLMLM HPHPHPLGLGLGMAMAMAMAMAMAMAMAMAMAMAMAMAMA MAMAMAMAMAMAMAMAMAMAMAMAMANPNPNPNONONOMA MAMAKCKCKCIDIDIDJGJGJGMAMAMAMAMAMAIOIOIO JJJJJJLPLPLPLOLOLOIPIPIPJMJMJMMAMAMALMLM LMHPHPHPLGLGLGMAMAMAMAMAMAMAMAMAMAMAMAMA MAMAMAMAMAMAMAMAMAMAMAMAMAMANANANANLNLNL MAMAMAKCKCKCIBIBIBJLJLJLMAMAMAMAMAMALFLF LFIIIIIIIFIFIFIDIDIDIGIGIGKOKOKOMAMAMALM LMLMHPHPHPLGLGLGMAMAMAMAMAMAMAMAMAMAMAMA MAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMAMALILI LILELELEKPKPKPLHLHLHMAMAMAMAMAMAMAMAMAMA MAMALPLPLPLFLFLFLFLFLFLPLPLPMAMAMAMAMAMA LPLPLPLJLJLJLOLOLOMAMAMAAPAAPOAAAEAAABAA AAAAAAAAAAAAAAABAEAAABAAAAAABLAAAAAAABAB AEAAABAAAAAABFAAAAAAACABADAAADAAAAAAAIAA AAAAADABADAAABAAAAAAABAAAAAAAGABADAAABAA AAAAACAAAAAAAOABACAAABAAAAAAAOAAAAAABBAB AEAAABAAAAAABPAAAAAABCABADAAABAAAAAAABAA AAAABFABADAAABAAAAAAADAAAAAABGABAEAAABAA AAAABFAAAAAABHABAEAAABAAAAAAKFAGAAAABKAB AFAAABAAAAAAAPAAAAAABLABAFAAABAAAAAABHAA AAAACIABADAAABAAAAAAADAAAAAAAAAAAAAA