From 616ceaf3268ec45a19378cb08371c6a2708da2b8 Mon Sep 17 00:00:00 2001 From: Mario Fink Date: Wed, 20 Jan 2021 12:24:45 +0100 Subject: [PATCH] tdm_reaper: process submatrices, localcolumns --- lib/tdm_datamodel.hpp | 7 +- lib/tdm_reaper.cpp | 116 ++++++++++++++- lib/tdm_reaper.hpp | 326 +----------------------------------------- src/main.cpp | 3 +- 4 files changed, 125 insertions(+), 327 deletions(-) diff --git a/lib/tdm_datamodel.hpp b/lib/tdm_datamodel.hpp index 202150c..d0c77c8 100644 --- a/lib/tdm_datamodel.hpp +++ b/lib/tdm_datamodel.hpp @@ -292,7 +292,7 @@ struct localcolumn { std::string values_; // -> refers to usi:data -> _sequence - const std::string get_info(int width = 20) + const std::string get_info(int width = 25) { std::stringstream ss; ss<process_root(showlog); this->process_channelgroups(showlog); this->process_channels(showlog); + this->process_submatrices(showlog); + this->process_localcolumns(showlog); } void tdm_reaper::process_include(bool showlog) @@ -181,7 +183,7 @@ void tdm_reaper::process_channelgroups(bool showlog) } else { - throw std::runtime_error("tdm_channelgroup without root id"); + throw std::runtime_error("tdm_channelgroup with out/multiple root id(s)"); } tdmchannelgroup.channels_ = this->extract_ids(group.child_value("channels")); tdmchannelgroup.submatrices_ = this->extract_ids(group.child_value("submatrices")); @@ -214,8 +216,12 @@ void tdm_reaper::process_channels(bool showlog) tdmchannel.description_ = channel.child_value("description"); tdmchannel.unit_string_ = channel.child_value("unit_string"); tdmchannel.datatype_ = channel.child_value("datatype"); - tdmchannel.minimum_ = std::stod(channel.child_value("minimum")); - tdmchannel.maximum_ = std::stod(channel.child_value("maximum")); + std::string chmin = channel.child_value("minimum"); + chmin = chmin.empty() ? std::string("0.0") : chmin; + tdmchannel.minimum_ = std::stod(chmin); + std::string chmax = channel.child_value("maximum"); + chmax = chmax.empty() ? std::string("0.0") : chmax; + tdmchannel.maximum_ = std::stod(chmax); std::vector cg = this->extract_ids(channel.child_value("group")); if ( cg.size() == 1 ) { @@ -223,7 +229,7 @@ void tdm_reaper::process_channels(bool showlog) } else { - throw std::runtime_error("tdm_channel without group id"); + throw std::runtime_error("tdm_channel with out/multiple group id(s)"); } tdmchannel.local_columns_ = this->extract_ids(channel.child_value("local_columns")); @@ -236,5 +242,107 @@ void tdm_reaper::process_channels(bool showlog) if ( showlog ) std::cout<<"number of channels: "< + pugi::xml_node tdmdata = xml_doc_.child("usi:tdm").child("usi:data"); + + // find all its elements + for ( pugi::xml_node subm = tdmdata.child("submatrix"); subm; + subm = subm.next_sibling("submatrix") ) + { + // declare new submatrix + submatrix submat; + + // extract properties + submat.id_ = subm.attribute("id").value(); + submat.name_ = subm.child_value("name"); + submat.description_ = subm.child_value("description"); + std::vector mid = this->extract_ids(subm.child_value("measurement")); + if ( mid.size() == 1 ) + { + submat.measurement_ = mid.at(0); + } + else + { + throw std::runtime_error("submatrix with out/multiple measurement id(s)"); + } + submat.local_columns_ = this->extract_ids(subm.child_value("local_columns")); + std::string numrows = subm.child_value("number_of_rows"); + numrows = numrows.empty() ? std::string("0") : numrows; + submat.number_of_rows_ = std::stoul(numrows); + + // add submatrix to map + submatrices_.insert( std::pair(submat.id_,submat) ); + + if ( showlog ) std::cout< + pugi::xml_node tdmdata = xml_doc_.child("usi:tdm").child("usi:data"); + + // find all its elements + for ( pugi::xml_node loccol = tdmdata.child("localcolumn"); loccol; + loccol = loccol.next_sibling("localcolumn") ) + { + // declare new localcolumn + localcolumn locc; + + // extract properties + locc.id_ = loccol.attribute("id").value(); + locc.name_ = loccol.child_value("name"); + locc.description_ = loccol.child_value("description"); + std::vector mq = this->extract_ids(loccol.child_value("measurement_quantity")); + if ( mq.size() == 1 ) + { + locc.measurement_quantity_ = mq.at(0); + } + else + { + throw std::runtime_error("localcolumn with out/multiple measurement quantity id(s)"); + } + std::vector sm = this->extract_ids(loccol.child_value("submatrix")); + if ( sm.size() == 1 ) + { + locc.submatrix_ = sm.at(0); + } + else + { + throw std::runtime_error("localcolumn with out/multiple submatrix id(s)"); + } + std::string lcmin = loccol.child_value("minimum"); + lcmin = lcmin.empty() ? std::string("0.0") : lcmin; + locc.minimum_ = std::stod(lcmin); + std::string lcmax = loccol.child_value("maximum"); + lcmax = lcmax.empty() ? std::string("0.0") : lcmax; + locc.maximum_ = std::stod(lcmax); + locc.sequence_representation_ = loccol.child_value("sequence_representation"); + // TODO + // .... loccal.child_value("generation_parameters"); + + std::vector vl = this->extract_ids(loccol.child_value("values")); + if ( vl.size() == 1 ) + { + locc.values_ = vl.at(0); + } + else + { + throw std::runtime_error("localcolumn with out/multiple values id(s)"); + } + + // add localcolumn to map + localcolumns_.insert( std::pair(locc.id_,locc) ); + + if ( showlog ) std::cout< tdmchannels_; - // // number/names/ids of channels, channelgroups and channels's assignment to groups - // int num_channels_, num_groups_; - // std::vector channel_id_, inc_id_, units_, channel_name_; - // std::vector group_id_, group_name_; - // std::vector> group_timestamp_; - // std::vector num_channels_group_; - // std::vector channels_group_; - // std::vector channel_ext_; - // - // // neglect empty groups - // bool neglect_empty_groups_; - // int num_empty_groups_; - // - // // minimum/maximum value in particular channel (is provided in .tdm file as float) - // std::vector> minmax_; - // - // // use xpointers and ids to assign channels to byteoffsets - // std::map xml_local_columns_, xml_values_, xml_double_sequence_; - // - // // byteoffset, length and datatype of channels - // std::vector byteoffset_; - // std::vector length_; - // std::vector type_; - // std::vector external_id_; - // - // // NI datatypes ( ) - // std::map datatypes_; - // - // // .tdm-file eventually contains some meta information (about measurement) - // std::map root_info_; - // std::map meta_info_; - // + // submatrices and local_columns + std::map submatrices_; + std::map localcolumns_; + // // binary data container // std::vector tdxbuf_; @@ -129,293 +101,9 @@ public: void process_channelgroups(bool showlog); void process_channels(bool showlog); - // void parse_structure(); - // - // void list_channels(std::ostream& gout = std::cout, int width = 15, int maxshow = 50); - // void list_groups(std::ostream& gout = std::cout, int width = 15, int maxshow = 50); - // - // void show_structure(); - // - // // count number of occurences of substring in string - // int count_occ_string(std::string s, std::string sub) - // { - // int num_occs = 0; - // std::string::size_type pos = 0; - // - // while ( ( pos = s.find(sub,pos) ) != std::string::npos ) - // { - // num_occs++; - // pos += sub.length(); - // } - // - // return num_occs; - // } - // - // // obtain substring of 'entirestr' in between starting and stopping delimiter - // std::string get_str_between(std::string entirestr, std::string startlim, std::string stoplim) - // { - // std::size_t apos = entirestr.find(startlim); - // std::size_t bpos = entirestr.find_last_of(stoplim); - // assert( apos != std::string::npos && bpos != std::string::npos ); - // return entirestr.substr(apos+startlim.length(),bpos-(apos+startlim.length())); - // } - // - // void print_hash_local(const char* filename, int width = 20) - // { - // std::ofstream fout(filename); - // - // std::map::iterator it; - // int count = 0; - // for ( it = xml_local_columns_.begin(); it != xml_local_columns_.end(); it++ ) - // { - // count++; - // fout<first; - // fout<second; - // fout<<"\n"; - // } - // fout.close(); - // } - // - // void print_hash_values(const char* filename, int width = 20) - // { - // std::ofstream fout(filename); - // - // std::map::iterator it; - // int count = 0; - // for ( it = xml_values_.begin(); it != xml_values_.end(); it++ ) - // { - // count++; - // fout<first; - // fout<second; - // fout<<"\n"; - // } - // fout.close(); - // } - // - // void print_hash_double(const char* filename, int width = 20) - // { - // std::ofstream fout(filename); - // - // std::map::iterator it; - // int count = 0; - // for ( it = xml_double_sequence_.begin(); it != xml_double_sequence_.end(); it++ ) - // { - // count++; - // fout<first; - // fout<second; - // fout<<"\n"; - // } - // fout.close(); - // } - // - // void print_extid(const char* filename, int width = 20) - // { - // std::ofstream fout(filename); - // - // int count = 0; - // for ( auto extid: channel_ext_ ) - // { - // count++; - // fout<= 0 && groupid < num_groups_ ); - // - // return num_channels_group_[groupid]; - // } - // - // const std::string& channel_name(int channelid) - // { - // assert( channelid >= 0 && channelid < num_channels_ ); - // - // return channel_name_[channelid]; - // } - // - // // obtain overall channel id from combined group and group-specific channel id - // int obtain_channel_id(int groupid, int channelid) - // { - // assert( groupid >= 0 && groupid < num_groups_ ); - // assert( channelid >= 0 && channelid < num_channels_group_[groupid] ); - // - // // find cummulative number of channels - // int numsum = 0; - // for ( int i = 0; i < groupid; i++ ) - // { - // numsum += num_channels_group_[i]; - // } - // assert( (numsum + channelid) >= 0 ); - // assert( (numsum + channelid) <= num_channels_ ); - // - // return numsum+channelid; - // } - // - // const std::string& channel_name(int groupid, int channelid) - // { - // return channel_name_[obtain_channel_id(groupid,channelid)]; - // } - // - // const std::string& group_name(int groupid) - // { - // assert( groupid >= 0 && groupid < num_groups_ ); - // - // return group_name_[groupid]; - // } - // - // const std::string& channel_unit(int groupid, int channelid) - // { - // return units_[obtain_channel_id(groupid,channelid)]; - // } - // - // int channel_exists(int groupid, std::string channel_name) - // { - // assert( groupid >= 0 && groupid < num_groups_ ); - // - // int channelid = -1; - // for ( int i = 0; i < num_channels_group_[groupid]; i++) - // { - // if ( comparestrings(channel_name_[obtain_channel_id(groupid,i)],channel_name) ) - // { - // channelid = i; - // } - // } - // return channelid; - // } - // - // bool comparestrings(std::string s1, std::string s2, bool case_sensitive = false) - // { - // if ( case_sensitive ) - // { - // return ( s1.compare(s2) == 0 ); - // } - // else - // { - // std::transform( s1.begin(), s1.end(), s1.begin(), ::tolower); - // std::transform( s2.begin(), s2.end(), s2.begin(), ::tolower); - // return ( s1.compare(s2) == 0 ); - // } - // } - // - // // get time-stamp of channel-group in .tdm file given in unix format - // static std::string unix_timestamp(std::string unixts) - // { - // // average year of Gregorian calender - // const double avgdaysofyear = 365.0 + 1./4 - 1./100 + 1./400 - // - 8./24561; // gauge timestamp according to DIADEM result - // - // // convert string to long int = number of seconds since 0000/01/01 00:00 - // long int ts = atol(unixts.c_str()); - // assert( ts >= 0 ); - // - // // use STL to convert timestamp (epoch usually starts on 01.01.1970) - // std::time_t tstime = ts - 1970*avgdaysofyear*86400; - // - // // get rid of linebreak character and return the result - // return strtok(std::ctime(&tstime),"\n"); - // } - // - // std::string time_stamp(int groupid, bool startstop = true) - // { - // assert( groupid >= 0 && groupid < num_groups_ ); - // - // return startstop ? unix_timestamp(group_timestamp_[groupid].first) - // : unix_timestamp(group_timestamp_[groupid].second); - // } - // - // void list_datatypes(); - // - // // convert array of chars to single integer or floating point double - // int convert_int(std::vector bych); - // double convert_double(std::vector bych); - // - // // disassemble single integer or double into array of chars - // std::vector convert_int(int number); - // std::vector convert_double(double number); - // - // // convert entire channel, i.e. expert of .tdx binary file - // // std::vector convert_channel(int byteoffset, int length, int typesize); - // std::vector convert_channel(int channelid); - // - // // obtain channel from overall channel id... - // std::vector get_channel(int channelid); - // // ...or from group id and group-specific channel id - // std::vector channel(int groupid, int channelid) - // { - // return get_channel(obtain_channel_id(groupid,channelid)); - // } - // - // int channel_length(int groupid, int channelid) - // { - // return length_[channel_ext_[obtain_channel_id(groupid,channelid)]]; - // } - // - // double get_min(int groupid, int channelid) - // { - // return minmax_[obtain_channel_id(groupid,channelid)].first; - // } - // double get_max(int groupid, int channelid) - // { - // return minmax_[obtain_channel_id(groupid,channelid)].second; - // } - // - // void print_channel(int channelid, const char* filename, int width = 15); - // - // // obtain any meta information about .tdm-file if available - // std::string get_meta(std::string attribute_name) - // { - // // check if key "attribute_name" actually exits - // std::map::iterator positer = meta_info_.find(attribute_name); - // bool ispresent = ( positer == meta_info_.end() ) ? false : true; - // - // return ispresent ? meta_info_[attribute_name] : "key does not exist"; - // } - // - // // prepare meta information file including all available meta-data - // void print_meta(const char* filename, std::string sep = ",") - // { - // // open file - // std::ofstream fout(filename); - // - // for ( const auto& it : root_info_ ) - // { - // fout< channels); - // void set_groups(std::vector groups); - // void set_assigment(std::vector assignment); - // void set_channel(int i, std::vector data); - + // process submatrices and localcolumns + void process_submatrices(bool showlog); + void process_localcolumns(bool showlog); }; #endif diff --git a/src/main.cpp b/src/main.cpp index af8681d..685b665 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -181,7 +181,8 @@ int main(int argc, char* argv[]) try { jack.submit_files(cfgopts.at("tdm"),cfgopts.at("tdx"),true); } catch (const std::exception& e) { - throw std::runtime_error("failed to load and parse tdm/tdx files"); + throw std::runtime_error( std::string("failed to load and parse tdm/tdx files: ") + + e.what() ); } // print list of groups or channels to stdout