tdm data model: process tdm_root finished

This commit is contained in:
Mario Fink 2021-01-19 20:30:12 +01:00
parent c0f5346f58
commit 48ec660d2d
3 changed files with 402 additions and 334 deletions

View File

@ -4,8 +4,29 @@
// -------------------------------------------------------------------------- //
tdm_reaper::tdm_reaper()
{
}
tdm_reaper::tdm_reaper(std::string tdmfile, std::string tdxfile, bool showlog):
tdmfile_(tdmfile), tdxfile_(tdxfile)
{
// start processing tdm data model
this->process_tdm(showlog);
}
void tdm_reaper::submit_files(std::string tdmfile, std::string tdxfile, bool showlog)
{
// save files
tdmfile_ = tdmfile;
tdxfile_ = tdxfile;
// start processing tdm data model
this->process_tdm(showlog);
}
void tdm_reaper::process_tdm(bool showlog)
{
// check both tdm, tdx files
std::filesystem::path ptdm(tdmfile_), ptdx(tdxfile_);
@ -43,6 +64,7 @@ tdm_reaper::tdm_reaper(std::string tdmfile, std::string tdxfile, bool showlog):
// process elements of XML
this->process_include(showlog);
this->process_root(showlog);
}
@ -55,6 +77,9 @@ void tdm_reaper::process_include(bool showlog)
std::string endianness(tdmincl.child("file").attribute("byteOrder").value());
endianness_ = endianness.compare("littleEndian") == 0 ? true : false;
// check referenced .tdx file
std::string urltdx(tdmincl.child("file").attribute("url").value());
// obtain machine's endianess
int num = 1;
machine_endianness_ = ( *(char*)&num == 1 );
@ -64,12 +89,10 @@ void tdm_reaper::process_include(bool showlog)
{
std::cout<<"\n";
std::cout<<"endianess: "<<(endianness_?"little":"big")<<"\n"
<<"machine endianness: "<<(machine_endianness_?"little":"big")<<"\n\n";
<<"machine endianness: "<<(machine_endianness_?"little":"big")<<"\n"
<<"url: "<<urltdx<<"\n\n";
}
// check for existence of attributes before using
// pugi::xml_attribute attr;
// list block of massdata
for (pugi::xml_node anode: tdmincl.child("file").children())
{
@ -107,27 +130,52 @@ void tdm_reaper::process_include(bool showlog)
if ( showlog ) std::cout<<tdxblock.get_info()<<"\n";
}
if ( showlog ) std::cout<<"number of blocks: "<<tdx_blocks_.size()<<"\n";
if ( showlog ) std::cout<<"number of blocks: "<<tdx_blocks_.size()<<"\n\n";
}
void tdm_reaper::process_root(bool showlog)
{
// get XML node
pugi::xml_node tdmdataroot = xml_doc_.child("usi:tdm").child("usi:data")
.child("tdm_root");
// extract properties
tdmroot_.id_ = tdmdataroot.attribute("id").value();
tdmroot_.name_ = tdmdataroot.child_value("name");
tdmroot_.description_ = tdmdataroot.child_value("description");
tdmroot_.title_ = tdmdataroot.child_value("title");
tdmroot_.author_ = tdmdataroot.child_value("author");
tdmroot_.timestamp_ = tdmdataroot.child_value("datetime");
// collect group identifiers by means of regex pattern "usi[0-9]+"
std::string chnlgrps = tdmdataroot.child_value("channelgroups");
std::regex regid("(usi[0-9]+)");
std::smatch usi_match;
std::sregex_iterator pos(chnlgrps.begin(), chnlgrps.end(), regid);
std::sregex_iterator end;
for ( ; pos != end; ++pos) tdmroot_.channelgroups_.push_back(pos->str());
// std::cout<<pos->str(0)<<"\n";
if ( showlog ) std::cout<<tdmroot_.get_info()<<"\n";
}
// pugi::xml_node xmlusiincl = xml_doc_.child("usi:tdm").child("usi:include");
// pugi::xml_node xmlusidata = xml_doc_.child("usi:tdm").child("usi:data");
// pugi::xml_node xmltdmroot = xml_doc_.child("usi:tdm").child("usi:data").child("tdm_root");
void tdm_reaper::print_channel(int idx, char const* name, int width)
{
}
void tdm_reaper::list_groups(std::ostream& out, int g, int c)
{
}
void tdm_reaper::list_channels(std::ostream& out, int g, int c)
{
}
// void tdm_reaper::print_channel(int idx, char const* name, int width)
// {
//
// }
//
// void tdm_reaper::list_groups(std::ostream& out, int g, int c)
// {
//
// }
//
// void tdm_reaper::list_channels(std::ostream& out, int g, int c)
// {
//
// }
// -------------------------------------------------------------------------- //

View File

@ -16,6 +16,7 @@
#include <chrono>
#include <sstream>
#include <filesystem>
#include <regex>
#include "pugixml.hpp"
#include "tdm_datamodel.hpp"
@ -44,331 +45,345 @@ class tdm_reaper
// tdm root
tdm_root tdmroot_;
// number/names/ids of channels, channelgroups and channels's assignment to groups
int num_channels_, num_groups_;
std::vector<std::string> channel_id_, inc_id_, units_, channel_name_;
std::vector<std::string> group_id_, group_name_;
std::vector<std::pair<std::string,std::string>> group_timestamp_;
std::vector<int> num_channels_group_;
std::vector<int> channels_group_;
std::vector<int> channel_ext_;
// neglect empty groups
bool neglect_empty_groups_;
int num_empty_groups_;
// minimum/maximum value in particular channel (is provided in .tdm file as float)
std::vector<std::pair<double,double>> minmax_;
// use xpointers and ids to assign channels to byteoffsets
std::map<std::string,std::string> xml_local_columns_, xml_values_, xml_double_sequence_;
// byteoffset, length and datatype of channels
std::vector<int> byteoffset_;
std::vector<int> length_;
std::vector<std::string> type_;
std::vector<std::string> external_id_;
// NI datatypes ( )
std::map<std::string, int> datatypes_;
// .tdm-file eventually contains some meta information (about measurement)
std::map<std::string,std::string> root_info_;
std::map<std::string,std::string> meta_info_;
// binary data container
std::vector<unsigned char> tdxbuf_;
// // number/names/ids of channels, channelgroups and channels's assignment to groups
// int num_channels_, num_groups_;
// std::vector<std::string> channel_id_, inc_id_, units_, channel_name_;
// std::vector<std::string> group_id_, group_name_;
// std::vector<std::pair<std::string,std::string>> group_timestamp_;
// std::vector<int> num_channels_group_;
// std::vector<int> channels_group_;
// std::vector<int> channel_ext_;
//
// // neglect empty groups
// bool neglect_empty_groups_;
// int num_empty_groups_;
//
// // minimum/maximum value in particular channel (is provided in .tdm file as float)
// std::vector<std::pair<double,double>> minmax_;
//
// // use xpointers and ids to assign channels to byteoffsets
// std::map<std::string,std::string> xml_local_columns_, xml_values_, xml_double_sequence_;
//
// // byteoffset, length and datatype of channels
// std::vector<int> byteoffset_;
// std::vector<int> length_;
// std::vector<std::string> type_;
// std::vector<std::string> external_id_;
//
// // NI datatypes ( )
// std::map<std::string, int> datatypes_;
//
// // .tdm-file eventually contains some meta information (about measurement)
// std::map<std::string,std::string> root_info_;
// std::map<std::string,std::string> meta_info_;
//
// // binary data container
// std::vector<unsigned char> tdxbuf_;
public:
// decoding
tdm_reaper(std::string tdmfile, std::string tdxfile = std::string(""), bool showlog = false);
// encoding
tdm_reaper(std::vector<std::string> csvfile);
// decoding
tdm_reaper();
tdm_reaper(std::string tdmfile, std::string tdxfile = std::string(""), bool showlog = false);
// provide (tdm,tdx) files
void submit_files(std::string tdmfile, std::string tdxfile = std::string(""), bool showlog = false);
// process TDM data model in tdm file
void process_tdm(bool showlog);
// process <usi:include> element
void process_include(bool showlog);
void parse_structure();
// extract tdm_root
void process_root(bool showlog);
void list_channels(std::ostream& gout = std::cout, int width = 15, int maxshow = 50);
void list_groups(std::ostream& gout = std::cout, int width = 15, int maxshow = 50);
// process/list all channels and groups
void process_channels(bool showlog);
void process_groups(bool showlog);
void show_structure();
// count number of occurences of substring in string
int count_occ_string(std::string s, std::string sub)
{
int num_occs = 0;
std::string::size_type pos = 0;
while ( ( pos = s.find(sub,pos) ) != std::string::npos )
{
num_occs++;
pos += sub.length();
}
return num_occs;
}
// obtain substring of 'entirestr' in between starting and stopping delimiter
std::string get_str_between(std::string entirestr, std::string startlim, std::string stoplim)
{
std::size_t apos = entirestr.find(startlim);
std::size_t bpos = entirestr.find_last_of(stoplim);
assert( apos != std::string::npos && bpos != std::string::npos );
return entirestr.substr(apos+startlim.length(),bpos-(apos+startlim.length()));
}
void print_hash_local(const char* filename, int width = 20)
{
std::ofstream fout(filename);
std::map<std::string,std::string>::iterator it;
int count = 0;
for ( it = xml_local_columns_.begin(); it != xml_local_columns_.end(); it++ )
{
count++;
fout<<std::setw(width)<<count;
fout<<std::setw(width)<<it->first;
fout<<std::setw(width)<<it->second;
fout<<"\n";
}
fout.close();
}
void print_hash_values(const char* filename, int width = 20)
{
std::ofstream fout(filename);
std::map<std::string,std::string>::iterator it;
int count = 0;
for ( it = xml_values_.begin(); it != xml_values_.end(); it++ )
{
count++;
fout<<std::setw(width)<<count;
fout<<std::setw(width)<<it->first;
fout<<std::setw(width)<<it->second;
fout<<"\n";
}
fout.close();
}
void print_hash_double(const char* filename, int width = 20)
{
std::ofstream fout(filename);
std::map<std::string,std::string>::iterator it;
int count = 0;
for ( it = xml_double_sequence_.begin(); it != xml_double_sequence_.end(); it++ )
{
count++;
fout<<std::setw(width)<<count;
fout<<std::setw(width)<<it->first;
fout<<std::setw(width)<<it->second;
fout<<"\n";
}
fout.close();
}
void print_extid(const char* filename, int width = 20)
{
std::ofstream fout(filename);
int count = 0;
for ( auto extid: channel_ext_ )
{
count++;
fout<<std::setw(width)<<count;
fout<<std::setw(width)<<extid;
fout<<"\n";
}
fout.close();
}
// provide number of channels and group
const int& num_channels()
{
return num_channels_;
}
const int& num_groups()
{
return num_groups_;
}
// get number of channels in specific group
const int& no_channels(int groupid)
{
assert( groupid >= 0 && groupid < num_groups_ );
return num_channels_group_[groupid];
}
const std::string& channel_name(int channelid)
{
assert( channelid >= 0 && channelid < num_channels_ );
return channel_name_[channelid];
}
// obtain overall channel id from combined group and group-specific channel id
int obtain_channel_id(int groupid, int channelid)
{
assert( groupid >= 0 && groupid < num_groups_ );
assert( channelid >= 0 && channelid < num_channels_group_[groupid] );
// find cummulative number of channels
int numsum = 0;
for ( int i = 0; i < groupid; i++ )
{
numsum += num_channels_group_[i];
}
assert( (numsum + channelid) >= 0 );
assert( (numsum + channelid) <= num_channels_ );
return numsum+channelid;
}
const std::string& channel_name(int groupid, int channelid)
{
return channel_name_[obtain_channel_id(groupid,channelid)];
}
const std::string& group_name(int groupid)
{
assert( groupid >= 0 && groupid < num_groups_ );
return group_name_[groupid];
}
const std::string& channel_unit(int groupid, int channelid)
{
return units_[obtain_channel_id(groupid,channelid)];
}
int channel_exists(int groupid, std::string channel_name)
{
assert( groupid >= 0 && groupid < num_groups_ );
int channelid = -1;
for ( int i = 0; i < num_channels_group_[groupid]; i++)
{
if ( comparestrings(channel_name_[obtain_channel_id(groupid,i)],channel_name) )
{
channelid = i;
}
}
return channelid;
}
bool comparestrings(std::string s1, std::string s2, bool case_sensitive = false)
{
if ( case_sensitive )
{
return ( s1.compare(s2) == 0 );
}
else
{
std::transform( s1.begin(), s1.end(), s1.begin(), ::tolower);
std::transform( s2.begin(), s2.end(), s2.begin(), ::tolower);
return ( s1.compare(s2) == 0 );
}
}
// get time-stamp of channel-group in .tdm file given in unix format
static std::string unix_timestamp(std::string unixts)
{
// average year of Gregorian calender
const double avgdaysofyear = 365.0 + 1./4 - 1./100 + 1./400
- 8./24561; // gauge timestamp according to DIADEM result
// convert string to long int = number of seconds since 0000/01/01 00:00
long int ts = atol(unixts.c_str());
assert( ts >= 0 );
// use STL to convert timestamp (epoch usually starts on 01.01.1970)
std::time_t tstime = ts - 1970*avgdaysofyear*86400;
// get rid of linebreak character and return the result
return strtok(std::ctime(&tstime),"\n");
}
std::string time_stamp(int groupid, bool startstop = true)
{
assert( groupid >= 0 && groupid < num_groups_ );
return startstop ? unix_timestamp(group_timestamp_[groupid].first)
: unix_timestamp(group_timestamp_[groupid].second);
}
void list_datatypes();
// convert array of chars to single integer or floating point double
int convert_int(std::vector<unsigned char> bych);
double convert_double(std::vector<unsigned char> bych);
// disassemble single integer or double into array of chars
std::vector<unsigned char> convert_int(int number);
std::vector<unsigned char> convert_double(double number);
// convert entire channel, i.e. expert of .tdx binary file
// std::vector<double> convert_channel(int byteoffset, int length, int typesize);
std::vector<double> convert_channel(int channelid);
// obtain channel from overall channel id...
std::vector<double> get_channel(int channelid);
// ...or from group id and group-specific channel id
std::vector<double> channel(int groupid, int channelid)
{
return get_channel(obtain_channel_id(groupid,channelid));
}
int channel_length(int groupid, int channelid)
{
return length_[channel_ext_[obtain_channel_id(groupid,channelid)]];
}
double get_min(int groupid, int channelid)
{
return minmax_[obtain_channel_id(groupid,channelid)].first;
}
double get_max(int groupid, int channelid)
{
return minmax_[obtain_channel_id(groupid,channelid)].second;
}
void print_channel(int channelid, const char* filename, int width = 15);
// obtain any meta information about .tdm-file if available
std::string get_meta(std::string attribute_name)
{
// check if key "attribute_name" actually exits
std::map<std::string,std::string>::iterator positer = meta_info_.find(attribute_name);
bool ispresent = ( positer == meta_info_.end() ) ? false : true;
return ispresent ? meta_info_[attribute_name] : "key does not exist";
}
// prepare meta information file including all available meta-data
void print_meta(const char* filename, std::string sep = ",")
{
// open file
std::ofstream fout(filename);
for ( const auto& it : root_info_ )
{
fout<<it.first<<sep<<it.second<<"\n";
}
fout<<sep<<"\n";
for ( const auto& it : meta_info_ )
{
fout<<it.first<<sep<<it.second<<"\n";
}
// close down file
fout.close();
}
// void parse_structure();
//
// void list_channels(std::ostream& gout = std::cout, int width = 15, int maxshow = 50);
// void list_groups(std::ostream& gout = std::cout, int width = 15, int maxshow = 50);
//
// void show_structure();
//
// // count number of occurences of substring in string
// int count_occ_string(std::string s, std::string sub)
// {
// int num_occs = 0;
// std::string::size_type pos = 0;
//
// while ( ( pos = s.find(sub,pos) ) != std::string::npos )
// {
// num_occs++;
// pos += sub.length();
// }
//
// return num_occs;
// }
//
// // obtain substring of 'entirestr' in between starting and stopping delimiter
// std::string get_str_between(std::string entirestr, std::string startlim, std::string stoplim)
// {
// std::size_t apos = entirestr.find(startlim);
// std::size_t bpos = entirestr.find_last_of(stoplim);
// assert( apos != std::string::npos && bpos != std::string::npos );
// return entirestr.substr(apos+startlim.length(),bpos-(apos+startlim.length()));
// }
//
// void print_hash_local(const char* filename, int width = 20)
// {
// std::ofstream fout(filename);
//
// std::map<std::string,std::string>::iterator it;
// int count = 0;
// for ( it = xml_local_columns_.begin(); it != xml_local_columns_.end(); it++ )
// {
// count++;
// fout<<std::setw(width)<<count;
// fout<<std::setw(width)<<it->first;
// fout<<std::setw(width)<<it->second;
// fout<<"\n";
// }
// fout.close();
// }
//
// void print_hash_values(const char* filename, int width = 20)
// {
// std::ofstream fout(filename);
//
// std::map<std::string,std::string>::iterator it;
// int count = 0;
// for ( it = xml_values_.begin(); it != xml_values_.end(); it++ )
// {
// count++;
// fout<<std::setw(width)<<count;
// fout<<std::setw(width)<<it->first;
// fout<<std::setw(width)<<it->second;
// fout<<"\n";
// }
// fout.close();
// }
//
// void print_hash_double(const char* filename, int width = 20)
// {
// std::ofstream fout(filename);
//
// std::map<std::string,std::string>::iterator it;
// int count = 0;
// for ( it = xml_double_sequence_.begin(); it != xml_double_sequence_.end(); it++ )
// {
// count++;
// fout<<std::setw(width)<<count;
// fout<<std::setw(width)<<it->first;
// fout<<std::setw(width)<<it->second;
// fout<<"\n";
// }
// fout.close();
// }
//
// void print_extid(const char* filename, int width = 20)
// {
// std::ofstream fout(filename);
//
// int count = 0;
// for ( auto extid: channel_ext_ )
// {
// count++;
// fout<<std::setw(width)<<count;
// fout<<std::setw(width)<<extid;
// fout<<"\n";
// }
// fout.close();
// }
//
// // provide number of channels and group
// const int& num_channels()
// {
// return num_channels_;
// }
// const int& num_groups()
// {
// return num_groups_;
// }
//
// // get number of channels in specific group
// const int& no_channels(int groupid)
// {
// assert( groupid >= 0 && groupid < num_groups_ );
//
// return num_channels_group_[groupid];
// }
//
// const std::string& channel_name(int channelid)
// {
// assert( channelid >= 0 && channelid < num_channels_ );
//
// return channel_name_[channelid];
// }
//
// // obtain overall channel id from combined group and group-specific channel id
// int obtain_channel_id(int groupid, int channelid)
// {
// assert( groupid >= 0 && groupid < num_groups_ );
// assert( channelid >= 0 && channelid < num_channels_group_[groupid] );
//
// // find cummulative number of channels
// int numsum = 0;
// for ( int i = 0; i < groupid; i++ )
// {
// numsum += num_channels_group_[i];
// }
// assert( (numsum + channelid) >= 0 );
// assert( (numsum + channelid) <= num_channels_ );
//
// return numsum+channelid;
// }
//
// const std::string& channel_name(int groupid, int channelid)
// {
// return channel_name_[obtain_channel_id(groupid,channelid)];
// }
//
// const std::string& group_name(int groupid)
// {
// assert( groupid >= 0 && groupid < num_groups_ );
//
// return group_name_[groupid];
// }
//
// const std::string& channel_unit(int groupid, int channelid)
// {
// return units_[obtain_channel_id(groupid,channelid)];
// }
//
// int channel_exists(int groupid, std::string channel_name)
// {
// assert( groupid >= 0 && groupid < num_groups_ );
//
// int channelid = -1;
// for ( int i = 0; i < num_channels_group_[groupid]; i++)
// {
// if ( comparestrings(channel_name_[obtain_channel_id(groupid,i)],channel_name) )
// {
// channelid = i;
// }
// }
// return channelid;
// }
//
// bool comparestrings(std::string s1, std::string s2, bool case_sensitive = false)
// {
// if ( case_sensitive )
// {
// return ( s1.compare(s2) == 0 );
// }
// else
// {
// std::transform( s1.begin(), s1.end(), s1.begin(), ::tolower);
// std::transform( s2.begin(), s2.end(), s2.begin(), ::tolower);
// return ( s1.compare(s2) == 0 );
// }
// }
//
// // get time-stamp of channel-group in .tdm file given in unix format
// static std::string unix_timestamp(std::string unixts)
// {
// // average year of Gregorian calender
// const double avgdaysofyear = 365.0 + 1./4 - 1./100 + 1./400
// - 8./24561; // gauge timestamp according to DIADEM result
//
// // convert string to long int = number of seconds since 0000/01/01 00:00
// long int ts = atol(unixts.c_str());
// assert( ts >= 0 );
//
// // use STL to convert timestamp (epoch usually starts on 01.01.1970)
// std::time_t tstime = ts - 1970*avgdaysofyear*86400;
//
// // get rid of linebreak character and return the result
// return strtok(std::ctime(&tstime),"\n");
// }
//
// std::string time_stamp(int groupid, bool startstop = true)
// {
// assert( groupid >= 0 && groupid < num_groups_ );
//
// return startstop ? unix_timestamp(group_timestamp_[groupid].first)
// : unix_timestamp(group_timestamp_[groupid].second);
// }
//
// void list_datatypes();
//
// // convert array of chars to single integer or floating point double
// int convert_int(std::vector<unsigned char> bych);
// double convert_double(std::vector<unsigned char> bych);
//
// // disassemble single integer or double into array of chars
// std::vector<unsigned char> convert_int(int number);
// std::vector<unsigned char> convert_double(double number);
//
// // convert entire channel, i.e. expert of .tdx binary file
// // std::vector<double> convert_channel(int byteoffset, int length, int typesize);
// std::vector<double> convert_channel(int channelid);
//
// // obtain channel from overall channel id...
// std::vector<double> get_channel(int channelid);
// // ...or from group id and group-specific channel id
// std::vector<double> channel(int groupid, int channelid)
// {
// return get_channel(obtain_channel_id(groupid,channelid));
// }
//
// int channel_length(int groupid, int channelid)
// {
// return length_[channel_ext_[obtain_channel_id(groupid,channelid)]];
// }
//
// double get_min(int groupid, int channelid)
// {
// return minmax_[obtain_channel_id(groupid,channelid)].first;
// }
// double get_max(int groupid, int channelid)
// {
// return minmax_[obtain_channel_id(groupid,channelid)].second;
// }
//
// void print_channel(int channelid, const char* filename, int width = 15);
//
// // obtain any meta information about .tdm-file if available
// std::string get_meta(std::string attribute_name)
// {
// // check if key "attribute_name" actually exits
// std::map<std::string,std::string>::iterator positer = meta_info_.find(attribute_name);
// bool ispresent = ( positer == meta_info_.end() ) ? false : true;
//
// return ispresent ? meta_info_[attribute_name] : "key does not exist";
// }
//
// // prepare meta information file including all available meta-data
// void print_meta(const char* filename, std::string sep = ",")
// {
// // open file
// std::ofstream fout(filename);
//
// for ( const auto& it : root_info_ )
// {
// fout<<it.first<<sep<<it.second<<"\n";
// }
// fout<<sep<<"\n";
// for ( const auto& it : meta_info_ )
// {
// fout<<it.first<<sep<<it.second<<"\n";
// }
//
// // close down file
// fout.close();
// }
// TODO add elements/methods to build .tdm and write .tdx files for your own data
// by constructing xml document tree and write data to binary .tdx

View File

@ -175,7 +175,12 @@ int main(int argc, char* argv[])
// bool listchannels = cfgopts.count("listchannels") == 1 ? true : false;
// declare and initialize tdm_ripper instance
tdm_reaper jack(cfgopts.at("tdm"),cfgopts.at("tdx"),true);
tdm_reaper jack;
try {
jack.submit_files(cfgopts.at("tdm"),cfgopts.at("tdx"),true);
} catch (const std::exception& e) {
throw std::runtime_error("failed to load and parse tdm/tdx files");
}
// print list of groups or channels to stdout
// if ( listgroups ) jack.list_groups();