% splits deCODEme_csv.csv file into a separate csv file for each chromosome % % the file takes some time to run, as it is a big file with strings % note: pay attention on whether the file has a header line or not %read various columns of csv file as string disp('Reading deCODEme_scan.csv') fi=fopen('deCODEme_scan.csv'); %if the csv file does not contain a header line decodeme=textscan(fi, '%s %s %s %s %s %s', 'delimiter',','); %if it does, instead use this command %decodeme=textscan(fi,'%s %s %s %s %s %s', 'delimiter',',','headerLines',1); fclose(fi); %decodeme{i} is a vector of strings ndecodeme=size(decodeme{1},1); disp('deCODEme.csv read'); %find where chr X,Y and M begins i=1; while ~(strcmp(decodeme{3}(i),'X')) i=i+1; end initX=i; while ~(strcmp(decodeme{3}(i),'Y')) i=i+1; end initY=i; while ~(strcmp(decodeme{3}(i),'M')) i=i+1; end initM=i; %note. format data %rs4477212,A/G,1,72017,+,AA %extract files for chr 1:22 disp('Start extracting') chromosome=str2num(char(decodeme{3}(1:initX-1,:))); for i=1:22 disp(strcat('Extracting chr',num2str(i))) fid=find(chromosome==i); n_rs_chr=size(fid,1); filetosavestring=strcat('cacio',num2str(i),'.csv'); fi=fopen(filetosavestring,'w'); for j=1:n_rs_chr mystring=decodeme{1}(fid(j)); for k=2:6 mystring=strcat(mystring,',',decodeme{k}(fid(j))); end towrite=char(mystring); fprintf(fi,'%s\n',towrite); end fclose(fi); disp(strcat('Done - extracted chr',num2str(i))) end %extract X,Y,M disp('Extracting chr X') fi=fopen('cacioX.csv','w'); for i=initX:initY-1 mystring=decodeme{1}(i); for j=2:6 mystring=strcat(mystring,',',decodeme{j}(i)); end towrite=char(mystring); fprintf(fi,'%s\n',towrite); end fclose(fi); disp('Done - extracted chr X') disp('Extracting chr Y') fi=fopen('cacioY.csv','w'); for i=initY:initM-1 mystring=decodeme{1}(i); for j=2:6 mystring=strcat(mystring,',',decodeme{j}(i)); end towrite=char(mystring); fprintf(fi,'%s\n',towrite); end fclose(fi); disp('Done - extracted chr Y') disp('Extracting mtDNA') fi=fopen('cacioM.csv','w'); for i=initM:ndecodeme mystring=decodeme{1}(i); for j=2:6 mystring=strcat(mystring,',',decodeme{j}(i)); end towrite=char(mystring); fprintf(fi,'%s\n',towrite); end fclose(fi); disp('Done - extracted mtDNA')