Skip to content

Commit

Permalink
Merge pull request #14 from SysBioChalmers/devel
Browse files Browse the repository at this point in the history
Pre-release,  v0.1
  • Loading branch information
haowang-bioinfo authored Aug 24, 2018
2 parents 03b011c + 18dc1b6 commit fadd53f
Show file tree
Hide file tree
Showing 126 changed files with 508,490 additions and 279,977 deletions.
26 changes: 23 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,23 @@
# OS generated files #
######################
.DS_Store
# Windows default autosave extension
*.asv
Thumbs.db

# OSX / *nix default autosave extension
*.*~

# Compiled MEX binaries (all platforms)
*.mex*

# OS hidden extensions
.DS_Store

# specific extensions
*.pyc
*.mps

# Windows executable files
*.exe
*.exe.*

# Windows dynamic library files
*.dll
Binary file added ComplementaryData/BiGG/BiGGMets.mat
Binary file not shown.
Binary file added ComplementaryData/BiGG/BiGGRxns.mat
Binary file not shown.
Binary file added ComplementaryData/BiGG/Recon1_rxns.mat
Binary file not shown.
58 changes: 58 additions & 0 deletions ComplementaryData/BiGG/getMetsFromBiGG.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
%
% FILE NAME: getMetsFromBiGG.m
%
% DATE CREATED: 2018-04-24
%
%
% PROGRAMMER: Hao Wang
% Department of Biology and Biological Engineering
% Chalmers University of Technology
%
%
% PURPOSE: Generate data structure for BiGG metabolites
%

% Move to the target folder
cd('/Users/haowa/Box Sync/HMR3/BiGG');

% Load the text format BiGG reactions, after fixing errors with M01870
T=readtable('bigg_models_metabolites_20180424.txt','ReadVariableNames',1);
BiGGMets=table2struct(T,'ToScalar',true);

% Rename the fields according to RAVEN specification
BiGGMets.mets=BiGGMets.bigg_id;
BiGGMets.metNames=BiGGMets.name;
BiGGMets.universalID=BiGGMets.universal_bigg_id;

% Add oldids field as cell array based on info from old_bigg_ids
num=numel(BiGGMets.bigg_id);
BiGGMets.oldids=cell(num,1);
BiGGMets.oldids(:)={''};
% Add MNX field
metMNXID=regexp(BiGGMets.database_links,'MNXM\d+','match');
BiGGMets.metMNXID=cell(num,1);
BiGGMets.metMNXID(:)={''};
count=0;
for i=1:num
if ~isempty(BiGGMets.old_bigg_ids{i})
% Remove existing BiGG ids and universal ids from oldids
BiGGMets.oldids{i}=setdiff(transpose(strsplit(BiGGMets.old_bigg_ids{i},'; ')),BiGGMets.mets{i});
BiGGMets.oldids{i}=setdiff(BiGGMets.oldids{i},BiGGMets.universalID{i});
end
if ~isempty(metMNXID{i})
if numel(metMNXID{i})==1
BiGGMets.metMNXID{i}=metMNXID{i}{1};
else
BiGGMets.metMNXID{i}=metMNXID{i};
count=count+1;
end
end
end
%count=0
numel(find(~cellfun(@isempty,BiGGMets.metMNXID))) %ans = 9915

% Remove some fields
BiGGMets=rmfield(BiGGMets,{'bigg_id','name','universal_bigg_id','old_bigg_ids'});

save('BiGGMets.mat','BiGGMets');

59 changes: 59 additions & 0 deletions ComplementaryData/BiGG/getRxnsFromBiGG.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
%
% FILE NAME: getRxnsFromBiGG.m
%
% DATE CREATED: 2018-01-17
% MODIFIED: 2018-04-24
%
%
% PROGRAMMER: Hao Wang
% Department of Biology and Biological Engineering
% Chalmers University of Technology
%
%
% PURPOSE: Generate Matlab structure for BiGG reactions
%

% Move to the target folder
cd('/Users/haowa/Box Sync/HMR3/BiGG');

% Load the text format BiGG reactions
T=readtable('bigg_models_reactions_20180424.txt','ReadVariableNames',1);
BiGGRxns=table2struct(T,'ToScalar',true);

% Rename the fields according to RAVEN specification
BiGGRxns.rxns=BiGGRxns.bigg_id;
BiGGRxns.rxnNames=BiGGRxns.name;
BiGGRxns.rxnEquations=BiGGRxns.reaction_string;

% Add oldids field as cell array based on info from old_bigg_ids
num=numel(BiGGRxns.bigg_id);
BiGGRxns.oldids=cell(num,1);
BiGGRxns.oldids(:)={''};
% Add MNX field
rxnMNXID=regexp(BiGGRxns.database_links,'MNXR\d+','match');
BiGGRxns.rxnMNXID=cell(num,1);
BiGGRxns.rxnMNXID(:)={''};
count=0;
for i=1:num
% Ignore old_bigg_ids identical to bigg_id
if ~isequal(BiGGRxns.rxns{i},BiGGRxns.old_bigg_ids{i})
% Remove existing BiGG ids from oldids
BiGGRxns.oldids{i}=setdiff(transpose(strsplit(BiGGRxns.old_bigg_ids{i},'; ')),BiGGRxns.rxns{i});
end
if ~isempty(rxnMNXID{i})
if numel(rxnMNXID{i})==1
BiGGRxns.rxnMNXID{i}=rxnMNXID{i}{1};
else
BiGGRxns.rxnMNXID{i}=rxnMNXID{i};
count=count+1;
end
end
end
%count=0
numel(find(~cellfun(@isempty,BiGGRxns.rxnMNXID))) %ans = 15904

% Remove some fields
BiGGRxns=rmfield(BiGGRxns,{'bigg_id','name','reaction_string','old_bigg_ids'});

save('BiGGRxns.mat','BiGGRxns');

Binary file added ComplementaryData/Ensembl/Ensembl2NCBI.mat
Binary file not shown.
Binary file added ComplementaryData/Ensembl/Ensembl2Uniprot.mat
Binary file not shown.
46 changes: 46 additions & 0 deletions ComplementaryData/Ensembl/assocEnsemblGenes.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
%
% FILE NAME: assocEnsemblGenes.m
%
% DATE CREATED: 2018-05-03
% UPDATED: 2018-05-31
%
% PROGRAMMER: Hao Wang
% Department of Biology and Biological Engineering
% Chalmers University of Technology
%
%
% PURPOSE: Get exteranl associations of Ensembl genes
%

% Move to Ensembl folder
cd('/Users/haowa/Box Sync/HMR3/Ensembl');

T=readtable('stableGeneID2NCBIgeneID.txt','ReadVariableNames',1);
T=table2struct(T,'ToScalar',true);

% Format NCBI Gene IDs
NCBIGeneID=num2cell(T.NCBIGeneID); % convert vector to cell array
NCBIGeneID=cellfun(@num2str,NCBIGeneID,'UniformOutput',false); % convert each element from double to string
% Clear elements without NCBI Gene ID associations
NaNidx=find(strcmp('NaN',NCBIGeneID));
NCBIGeneID(NaNidx)={''};

% Generate data structure for NCBIGeneID
Ensembl2NCBI.genes=T.GeneStableID;
Ensembl2NCBI.NCBIGeneID=NCBIGeneID;

% Manual curation 2018-06-13
% remove repetitive associaiton "ENSG00000174876-278" based on NCBI
Ensembl2NCBI.genes(34825)=[];
Ensembl2NCBI.NCBIGeneID(34825)=[];
save('Ensembl2NCBI.mat','Ensembl2NCBI');

T=readtable('stableGeneID2UniprotID.txt','ReadVariableNames',1);
T=table2struct(T,'ToScalar',true);

% Generate data structure for UniprotID
Ensembl2Uniprot.genes=T.GeneStableID;
Ensembl2Uniprot.ID=;
Ensembl2Uniprot.ID=;
save('Ensembl2Uniprot.mat','Ensembl2Uniprot');

53 changes: 53 additions & 0 deletions ComplementaryData/HMR2/HMR2Curation.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
%
% FILE NAME: HMR2Curation.m
%
% DATE CREATED: 2018-01-16
%
%
% PROGRAMMER: Hao Wang
% Department of Biology and Biological Engineering
% Chalmers University of Technology
%
%
% PURPOSE: Curating HMR2 database model toward HMR3
%

% Load the original Matlab file of HMR2 database
load('HMRdatabase2_00.mat');

% Import HMR2 reactions from the Excel file 'RXNS' sheet that leave out
% the following five rows that represent reaction classes
% 7709: 'Exchange reactions';
% 8171: 'Fake reactions';
% 8176: 'Biomass reactions';
% 8177: 'HMR_biomass_Renalcancer';
% 8183: 'Included for connectivity for INIT'
T=readtable('HMRdatabase2_00.xlsx','Sheet','RXNS','ReadVariableNames',1);
HMR2=table2struct(T,'ToScalar',true);

% Adding reaction identifiers from external databases
% additional empty spaces were also removed druing this process
if isequal(ihuman.rxns,HMR2.RXNID)
ihuman.rxnKEGGID=HMR2.KEGGID; %KEGG
ihuman.rxnEHMNID=HMR2.EHMNID; %EHMN
ihuman.rxnBiGGID=HMR2.BIGGDATABASEID; %Recon
ihuman.rxnHepatoNET1ID=HMR2.HEPATONET1ID; %Hepatonet1
ihuman.rxnREACTOMEID=HMR2.REACTOMEID; %Reactome
ihuman.rxnReferences=HMR2.REFERENCES; %References
end

% Some errors were spotted and fixed:
% The reaction HMR_2190 was assocated to four REACTOM reactions.
% The first one (REACT_22270) is correect, the other three
% (REACT_22097; REACT_22133; REACT_22219) should be wrong because
% they are empty ids by searching reactom.org, and thus removed
index=find(strcmp('HMR_2190',ihuman.rxns));
ihuman.rxnREACTOMEID{index}='REACT_22270';

% The KEGG id of HMR_7709 was associated to 'R0302' that should be typo
% It was manually checked and corrected to R03027
index=find(strcmp('HMR_7709',ihuman.rxns));
ihuman.rxnKEGGID{index}='R03027';

% Save as version 2.0.1
save('HMRdatabase2_01.mat','ihuman'); %===2018-01-16
Binary file added ComplementaryData/HMR2/HMRdatabase2_00.mat
Binary file not shown.
Binary file added ComplementaryData/HMR2/HMRdatabase2_00.xlsx
Binary file not shown.
Binary file added ComplementaryData/HMR2/HMRdatabase2_01.mat
Binary file not shown.
Binary file added ComplementaryData/HMR2/HMRdatabase2_02.mat
Binary file not shown.
Loading

0 comments on commit fadd53f

Please sign in to comment.