optimize searching xml tree with hash tables

This commit is contained in:
Mario Fink 2019-04-30 19:01:19 +02:00
parent b16539264f
commit 2841137f8e
4 changed files with 128 additions and 35 deletions

View File

@ -105,11 +105,45 @@ void tdm_ripper::parse_structure()
}
}
// extract basic information about available channels
// obtain list of xpointers and ids to assign channels
for (pugi::xml_node anode: subtreedata.children())
{
if ( std::string(anode.name()).compare("tdm_channel") == 0 )
{
std::string id(anode.attribute("id").value());
std::string val = get_str_between(anode.child_value("local_columns"),"\"","\"");
xml_local_columns_.insert(std::pair<std::string,std::string>(id,val));
}
if ( std::string(anode.name()).compare("localcolumn") == 0 )
{
std::string id(anode.attribute("id").value());
std::string val = get_str_between(anode.child_value("values"),"\"","\"");
xml_values_.insert(std::pair<std::string,std::string>(id,val));
}
if ( std::string(anode.name()).compare("double_sequence") == 0 )
{
std::string id(anode.attribute("id").value());
std::string val = anode.child("values").attribute("external").value();
xml_double_sequence_.insert(std::pair<std::string,std::string>(id,val));
}
}
std::cout<<"number of pairs in\n";
std::cout<<std::setw(25)<<std::left<<"xml_local_columns_:"<<xml_local_columns_.size()<<"\n";
std::cout<<std::setw(25)<<std::left<<"xml_values_:"<<xml_values_.size()<<"\n";
std::cout<<std::setw(25)<<std::left<<"xml_double_sequence_:"<<xml_double_sequence_.size()<<"\n";
std::cout<<std::right<<"\n\n";
// extract basic information about available channels
// int prog = 0;
for (pugi::xml_node anode: subtreedata.children())
{
if ( std::string(anode.name()).compare("tdm_channel") == 0 )
{
// prog++;
// std::cout<<"processing channel "<<prog<<"\n";
channel_id_.push_back(anode.attribute("id").value());
channel_name_.push_back(anode.child_value("name"));
std::string groupid(anode.child_value("group"));
@ -128,29 +162,34 @@ void tdm_ripper::parse_structure()
minmax_.push_back(minmaxchan);
// get correct assignment of channels to byteoffset, length and datatype
std::string locol = get_str_between(anode.child_value("local_columns"),"\"","\"");
std::string locolval;
for (pugi::xml_node anode: subtreedata.children())
{
if ( std::string(anode.name()).compare("localcolumn") == 0
&& std::string(anode.attribute("id").value()).compare(locol) == 0 )
{
locolval = get_str_between(anode.child_value("values"),"\"","\"");
}
}
// std::string locol = get_str_between(anode.child_value("local_columns"),"\"","\"");
// std::string locolval;
// locolval = local_columns_val_[locol];
// for (pugi::xml_node anode: subtreedata.children())
// {
// if ( std::string(anode.name()).compare("localcolumn") == 0
// && std::string(anode.attribute("id").value()).compare(locol) == 0 )
// {
// locolval = get_str_between(anode.child_value("values"),"\"","\"");
// }
// }
std::string locolvalext;
for (pugi::xml_node anode: subtreedata.children())
{
if ( std::string(anode.name()).compare("double_sequence") == 0
&& std::string(anode.attribute("id").value()).compare(locolval) == 0 )
{
locolvalext = anode.child("values").attribute("external").value();
}
}
// locolvalext = double_sequence_id_[locolval];
// for (pugi::xml_node anode: subtreedata.children())
// {
// if ( std::string(anode.name()).compare("double_sequence") == 0
// && std::string(anode.attribute("id").value()).compare(locolval) == 0 )
// {
// locolvalext = anode.child("values").attribute("external").value();
// }
// }
locolvalext = xml_double_sequence_[xml_values_[xml_local_columns_[anode.attribute("id").value()]]];
// save external id of channel and get corresponding channel index
inc_id_.push_back(locolvalext);
int extid = 0;
int extid = 1;
for ( int i = 0; i < (int)external_id_.size(); i++ )
{
if ( external_id_[i].compare(locolvalext) == 0 ) extid = i+1;
@ -159,6 +198,9 @@ void tdm_ripper::parse_structure()
}
}
// std::string keyinit("usi23258");
// std::cout<<"xml test "<<xml_double_sequence_[xml_values_[xml_local_columns_[keyinit]]]<<"\n\n";
// check consistency of number of channelgroups
int numgroups = count_occ_string(subtreedata.child("tdm_root").child_value("channelgroups"),"id");
if ( 0*numgroups == 0 ) assert( numgroups == num_groups_ );
@ -217,9 +259,9 @@ void tdm_ripper::list_channels(std::ostream& gout, int width, int maxshow)
gout<<std::setw(width)<<channel_id_[i];
gout<<std::setw(width)<<inc_id_[i];
gout<<std::setw(2*width)<<channel_name_[i];
gout<<std::setw(width)<<byteoffset_[i];
gout<<std::setw(width)<<length_[i];
gout<<std::setw(width)<<type_[i];
gout<<std::setw(width)<<byteoffset_[channel_ext_[i]-1];
gout<<std::setw(width)<<length_[channel_ext_[i]-1];
gout<<std::setw(width)<<type_[channel_ext_[i]-1];
gout<<std::setw(width)<<units_[i];
gout<<std::setw(width)<<minmax_[i].first;
gout<<std::setw(width)<<minmax_[i].second;
@ -227,8 +269,6 @@ void tdm_ripper::list_channels(std::ostream& gout, int width, int maxshow)
gout<<std::setw(width)<<group_id_[channels_group_[i]-1];
gout<<std::setw(width)<<group_name_[channels_group_[i]-1];
gout<<std::setw(width)<<num_channels_group_[channels_group_[i]-1];
gout<<std::setw(width)<<minmax_[i].first;
gout<<std::setw(width)<<minmax_[i].second;
gout<<"\n";
}
gout<<"\n\n";

View File

@ -35,6 +35,9 @@ class tdm_ripper
// minimum/maximum value in particular channel (is provided in .tdm file as float)
std::vector<std::pair<double,double>> minmax_;
// use xpointers and ids to assign channels to byteoffsets
std::map<std::string,std::string> xml_local_columns_, xml_values_, xml_double_sequence_;
// byteoffset, length and datatype of channels
std::vector<int> byteoffset_;
std::vector<int> length_;
@ -57,7 +60,7 @@ public:
void parse_structure();
void list_channels(std::ostream& gout = std::cout, int width = 15, int maxshow = 300);
void list_channels(std::ostream& gout = std::cout, int width = 15, int maxshow = 50);
void show_structure();
@ -85,6 +88,57 @@ public:
return entirestr.substr(apos+startlim.length(),bpos-(apos+startlim.length()));
}
void print_hash_local(const char* filename, int width = 20)
{
std::ofstream fout(filename);
std::map<std::string,std::string>::iterator it;
int count = 0;
for ( it = xml_local_columns_.begin(); it != xml_local_columns_.end(); it++ )
{
count++;
fout<<std::setw(width)<<count;
fout<<std::setw(width)<<it->first;
fout<<std::setw(width)<<it->second;
fout<<"\n";
}
fout.close();
}
void print_hash_values(const char* filename, int width = 20)
{
std::ofstream fout(filename);
std::map<std::string,std::string>::iterator it;
int count = 0;
for ( it = xml_values_.begin(); it != xml_values_.end(); it++ )
{
count++;
fout<<std::setw(width)<<count;
fout<<std::setw(width)<<it->first;
fout<<std::setw(width)<<it->second;
fout<<"\n";
}
fout.close();
}
void print_hash_double(const char* filename, int width = 20)
{
std::ofstream fout(filename);
std::map<std::string,std::string>::iterator it;
int count = 0;
for ( it = xml_double_sequence_.begin(); it != xml_double_sequence_.end(); it++ )
{
count++;
fout<<std::setw(width)<<count;
fout<<std::setw(width)<<it->first;
fout<<std::setw(width)<<it->second;
fout<<"\n";
}
fout.close();
}
// provide number of channels and group
const int& num_channels()
{

View File

@ -11,20 +11,16 @@ int main(int argc, char* argv[])
tdm_ripper ripper(argv[1]);
// ripper.list_datatypes();
// ripper.show_structure();
// int sn = -76476;
// std::vector<unsigned char> bych = ripper.convert_int(sn);
// std::cout<<"length of vector "<<bych.size()<<"\n\n";
// for ( auto c: bych) std::cout<<(int)c<<" ";
// std::cout<<"\n\n";
//
// std::cout<<ripper.convert_int(bych)<<"\n\n";
// ripper.print_hash_local("data/hash_table_xml_local.dat");
// ripper.print_hash_values("data/hash_table_xml_value.dat");
// ripper.print_hash_double("data/hash_table_xml_double.dat");
ripper.list_channels();
std::ofstream fout("data/list_of_channels.dat");
ripper.list_channels(fout);
fout.close();
// ripper.show_structure();
std::cout<<"number of channels "<<ripper.num_channels()<<"\n\n";
std::cout<<"number of groups "<<ripper.num_groups()<<"\n\n";
@ -33,7 +29,9 @@ int main(int argc, char* argv[])
// for ( auto el: channA ) std::cout<<el<<"\n";
// std::cout<<"\n\n";
for ( int i = 0; i < ripper.num_channels(); i++ )
for ( int i = 0; i < 30; i++ )
// for ( int i = 0; i < ripper.num_channels(); i++ )
// for ( int i = 11880; i < ripper.num_channels(); i++ )
{
ripper.print_channel(i+1,("data/channel_"+std::to_string(i+1)+"_"
+ripper.channel_name(i+1)+".dat").c_str());

View File

@ -17,6 +17,7 @@ tdm_ripper.o : lib/tdm_ripper.cpp lib/tdm_ripper.hpp
clean :
rm -f $(EXE) *.o
rm -f *.dat
rm -f data/*.dat
pylib : setup.py pytdm_ripper.pyx tdm_ripper.pxd tdm_ripper.o
python3 setup.py build_ext --inplace