optimize searching xml tree with hash tables
This commit is contained in:
parent
b16539264f
commit
2841137f8e
@ -105,11 +105,45 @@ void tdm_ripper::parse_structure()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// extract basic information about available channels
|
// obtain list of xpointers and ids to assign channels
|
||||||
for (pugi::xml_node anode: subtreedata.children())
|
for (pugi::xml_node anode: subtreedata.children())
|
||||||
{
|
{
|
||||||
if ( std::string(anode.name()).compare("tdm_channel") == 0 )
|
if ( std::string(anode.name()).compare("tdm_channel") == 0 )
|
||||||
{
|
{
|
||||||
|
std::string id(anode.attribute("id").value());
|
||||||
|
std::string val = get_str_between(anode.child_value("local_columns"),"\"","\"");
|
||||||
|
xml_local_columns_.insert(std::pair<std::string,std::string>(id,val));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( std::string(anode.name()).compare("localcolumn") == 0 )
|
||||||
|
{
|
||||||
|
std::string id(anode.attribute("id").value());
|
||||||
|
std::string val = get_str_between(anode.child_value("values"),"\"","\"");
|
||||||
|
xml_values_.insert(std::pair<std::string,std::string>(id,val));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( std::string(anode.name()).compare("double_sequence") == 0 )
|
||||||
|
{
|
||||||
|
std::string id(anode.attribute("id").value());
|
||||||
|
std::string val = anode.child("values").attribute("external").value();
|
||||||
|
xml_double_sequence_.insert(std::pair<std::string,std::string>(id,val));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout<<"number of pairs in\n";
|
||||||
|
std::cout<<std::setw(25)<<std::left<<"xml_local_columns_:"<<xml_local_columns_.size()<<"\n";
|
||||||
|
std::cout<<std::setw(25)<<std::left<<"xml_values_:"<<xml_values_.size()<<"\n";
|
||||||
|
std::cout<<std::setw(25)<<std::left<<"xml_double_sequence_:"<<xml_double_sequence_.size()<<"\n";
|
||||||
|
std::cout<<std::right<<"\n\n";
|
||||||
|
|
||||||
|
// extract basic information about available channels
|
||||||
|
// int prog = 0;
|
||||||
|
for (pugi::xml_node anode: subtreedata.children())
|
||||||
|
{
|
||||||
|
if ( std::string(anode.name()).compare("tdm_channel") == 0 )
|
||||||
|
{
|
||||||
|
// prog++;
|
||||||
|
// std::cout<<"processing channel "<<prog<<"\n";
|
||||||
|
|
||||||
channel_id_.push_back(anode.attribute("id").value());
|
channel_id_.push_back(anode.attribute("id").value());
|
||||||
channel_name_.push_back(anode.child_value("name"));
|
channel_name_.push_back(anode.child_value("name"));
|
||||||
std::string groupid(anode.child_value("group"));
|
std::string groupid(anode.child_value("group"));
|
||||||
@ -128,29 +162,34 @@ void tdm_ripper::parse_structure()
|
|||||||
minmax_.push_back(minmaxchan);
|
minmax_.push_back(minmaxchan);
|
||||||
|
|
||||||
// get correct assignment of channels to byteoffset, length and datatype
|
// get correct assignment of channels to byteoffset, length and datatype
|
||||||
std::string locol = get_str_between(anode.child_value("local_columns"),"\"","\"");
|
// std::string locol = get_str_between(anode.child_value("local_columns"),"\"","\"");
|
||||||
std::string locolval;
|
// std::string locolval;
|
||||||
for (pugi::xml_node anode: subtreedata.children())
|
// locolval = local_columns_val_[locol];
|
||||||
{
|
// for (pugi::xml_node anode: subtreedata.children())
|
||||||
if ( std::string(anode.name()).compare("localcolumn") == 0
|
// {
|
||||||
&& std::string(anode.attribute("id").value()).compare(locol) == 0 )
|
// if ( std::string(anode.name()).compare("localcolumn") == 0
|
||||||
{
|
// && std::string(anode.attribute("id").value()).compare(locol) == 0 )
|
||||||
locolval = get_str_between(anode.child_value("values"),"\"","\"");
|
// {
|
||||||
}
|
// locolval = get_str_between(anode.child_value("values"),"\"","\"");
|
||||||
}
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
std::string locolvalext;
|
std::string locolvalext;
|
||||||
for (pugi::xml_node anode: subtreedata.children())
|
// locolvalext = double_sequence_id_[locolval];
|
||||||
{
|
// for (pugi::xml_node anode: subtreedata.children())
|
||||||
if ( std::string(anode.name()).compare("double_sequence") == 0
|
// {
|
||||||
&& std::string(anode.attribute("id").value()).compare(locolval) == 0 )
|
// if ( std::string(anode.name()).compare("double_sequence") == 0
|
||||||
{
|
// && std::string(anode.attribute("id").value()).compare(locolval) == 0 )
|
||||||
locolvalext = anode.child("values").attribute("external").value();
|
// {
|
||||||
}
|
// locolvalext = anode.child("values").attribute("external").value();
|
||||||
}
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
locolvalext = xml_double_sequence_[xml_values_[xml_local_columns_[anode.attribute("id").value()]]];
|
||||||
|
|
||||||
// save external id of channel and get corresponding channel index
|
// save external id of channel and get corresponding channel index
|
||||||
inc_id_.push_back(locolvalext);
|
inc_id_.push_back(locolvalext);
|
||||||
int extid = 0;
|
int extid = 1;
|
||||||
for ( int i = 0; i < (int)external_id_.size(); i++ )
|
for ( int i = 0; i < (int)external_id_.size(); i++ )
|
||||||
{
|
{
|
||||||
if ( external_id_[i].compare(locolvalext) == 0 ) extid = i+1;
|
if ( external_id_[i].compare(locolvalext) == 0 ) extid = i+1;
|
||||||
@ -159,6 +198,9 @@ void tdm_ripper::parse_structure()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// std::string keyinit("usi23258");
|
||||||
|
// std::cout<<"xml test "<<xml_double_sequence_[xml_values_[xml_local_columns_[keyinit]]]<<"\n\n";
|
||||||
|
|
||||||
// check consistency of number of channelgroups
|
// check consistency of number of channelgroups
|
||||||
int numgroups = count_occ_string(subtreedata.child("tdm_root").child_value("channelgroups"),"id");
|
int numgroups = count_occ_string(subtreedata.child("tdm_root").child_value("channelgroups"),"id");
|
||||||
if ( 0*numgroups == 0 ) assert( numgroups == num_groups_ );
|
if ( 0*numgroups == 0 ) assert( numgroups == num_groups_ );
|
||||||
@ -217,9 +259,9 @@ void tdm_ripper::list_channels(std::ostream& gout, int width, int maxshow)
|
|||||||
gout<<std::setw(width)<<channel_id_[i];
|
gout<<std::setw(width)<<channel_id_[i];
|
||||||
gout<<std::setw(width)<<inc_id_[i];
|
gout<<std::setw(width)<<inc_id_[i];
|
||||||
gout<<std::setw(2*width)<<channel_name_[i];
|
gout<<std::setw(2*width)<<channel_name_[i];
|
||||||
gout<<std::setw(width)<<byteoffset_[i];
|
gout<<std::setw(width)<<byteoffset_[channel_ext_[i]-1];
|
||||||
gout<<std::setw(width)<<length_[i];
|
gout<<std::setw(width)<<length_[channel_ext_[i]-1];
|
||||||
gout<<std::setw(width)<<type_[i];
|
gout<<std::setw(width)<<type_[channel_ext_[i]-1];
|
||||||
gout<<std::setw(width)<<units_[i];
|
gout<<std::setw(width)<<units_[i];
|
||||||
gout<<std::setw(width)<<minmax_[i].first;
|
gout<<std::setw(width)<<minmax_[i].first;
|
||||||
gout<<std::setw(width)<<minmax_[i].second;
|
gout<<std::setw(width)<<minmax_[i].second;
|
||||||
@ -227,8 +269,6 @@ void tdm_ripper::list_channels(std::ostream& gout, int width, int maxshow)
|
|||||||
gout<<std::setw(width)<<group_id_[channels_group_[i]-1];
|
gout<<std::setw(width)<<group_id_[channels_group_[i]-1];
|
||||||
gout<<std::setw(width)<<group_name_[channels_group_[i]-1];
|
gout<<std::setw(width)<<group_name_[channels_group_[i]-1];
|
||||||
gout<<std::setw(width)<<num_channels_group_[channels_group_[i]-1];
|
gout<<std::setw(width)<<num_channels_group_[channels_group_[i]-1];
|
||||||
gout<<std::setw(width)<<minmax_[i].first;
|
|
||||||
gout<<std::setw(width)<<minmax_[i].second;
|
|
||||||
gout<<"\n";
|
gout<<"\n";
|
||||||
}
|
}
|
||||||
gout<<"\n\n";
|
gout<<"\n\n";
|
||||||
|
@ -35,6 +35,9 @@ class tdm_ripper
|
|||||||
// minimum/maximum value in particular channel (is provided in .tdm file as float)
|
// minimum/maximum value in particular channel (is provided in .tdm file as float)
|
||||||
std::vector<std::pair<double,double>> minmax_;
|
std::vector<std::pair<double,double>> minmax_;
|
||||||
|
|
||||||
|
// use xpointers and ids to assign channels to byteoffsets
|
||||||
|
std::map<std::string,std::string> xml_local_columns_, xml_values_, xml_double_sequence_;
|
||||||
|
|
||||||
// byteoffset, length and datatype of channels
|
// byteoffset, length and datatype of channels
|
||||||
std::vector<int> byteoffset_;
|
std::vector<int> byteoffset_;
|
||||||
std::vector<int> length_;
|
std::vector<int> length_;
|
||||||
@ -57,7 +60,7 @@ public:
|
|||||||
|
|
||||||
void parse_structure();
|
void parse_structure();
|
||||||
|
|
||||||
void list_channels(std::ostream& gout = std::cout, int width = 15, int maxshow = 300);
|
void list_channels(std::ostream& gout = std::cout, int width = 15, int maxshow = 50);
|
||||||
|
|
||||||
void show_structure();
|
void show_structure();
|
||||||
|
|
||||||
@ -85,6 +88,57 @@ public:
|
|||||||
return entirestr.substr(apos+startlim.length(),bpos-(apos+startlim.length()));
|
return entirestr.substr(apos+startlim.length(),bpos-(apos+startlim.length()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void print_hash_local(const char* filename, int width = 20)
|
||||||
|
{
|
||||||
|
std::ofstream fout(filename);
|
||||||
|
|
||||||
|
std::map<std::string,std::string>::iterator it;
|
||||||
|
int count = 0;
|
||||||
|
for ( it = xml_local_columns_.begin(); it != xml_local_columns_.end(); it++ )
|
||||||
|
{
|
||||||
|
count++;
|
||||||
|
fout<<std::setw(width)<<count;
|
||||||
|
fout<<std::setw(width)<<it->first;
|
||||||
|
fout<<std::setw(width)<<it->second;
|
||||||
|
fout<<"\n";
|
||||||
|
}
|
||||||
|
fout.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_hash_values(const char* filename, int width = 20)
|
||||||
|
{
|
||||||
|
std::ofstream fout(filename);
|
||||||
|
|
||||||
|
std::map<std::string,std::string>::iterator it;
|
||||||
|
int count = 0;
|
||||||
|
for ( it = xml_values_.begin(); it != xml_values_.end(); it++ )
|
||||||
|
{
|
||||||
|
count++;
|
||||||
|
fout<<std::setw(width)<<count;
|
||||||
|
fout<<std::setw(width)<<it->first;
|
||||||
|
fout<<std::setw(width)<<it->second;
|
||||||
|
fout<<"\n";
|
||||||
|
}
|
||||||
|
fout.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_hash_double(const char* filename, int width = 20)
|
||||||
|
{
|
||||||
|
std::ofstream fout(filename);
|
||||||
|
|
||||||
|
std::map<std::string,std::string>::iterator it;
|
||||||
|
int count = 0;
|
||||||
|
for ( it = xml_double_sequence_.begin(); it != xml_double_sequence_.end(); it++ )
|
||||||
|
{
|
||||||
|
count++;
|
||||||
|
fout<<std::setw(width)<<count;
|
||||||
|
fout<<std::setw(width)<<it->first;
|
||||||
|
fout<<std::setw(width)<<it->second;
|
||||||
|
fout<<"\n";
|
||||||
|
}
|
||||||
|
fout.close();
|
||||||
|
}
|
||||||
|
|
||||||
// provide number of channels and group
|
// provide number of channels and group
|
||||||
const int& num_channels()
|
const int& num_channels()
|
||||||
{
|
{
|
||||||
|
16
main.cpp
16
main.cpp
@ -11,20 +11,16 @@ int main(int argc, char* argv[])
|
|||||||
tdm_ripper ripper(argv[1]);
|
tdm_ripper ripper(argv[1]);
|
||||||
|
|
||||||
// ripper.list_datatypes();
|
// ripper.list_datatypes();
|
||||||
|
// ripper.show_structure();
|
||||||
|
|
||||||
// int sn = -76476;
|
// ripper.print_hash_local("data/hash_table_xml_local.dat");
|
||||||
// std::vector<unsigned char> bych = ripper.convert_int(sn);
|
// ripper.print_hash_values("data/hash_table_xml_value.dat");
|
||||||
// std::cout<<"length of vector "<<bych.size()<<"\n\n";
|
// ripper.print_hash_double("data/hash_table_xml_double.dat");
|
||||||
// for ( auto c: bych) std::cout<<(int)c<<" ";
|
|
||||||
// std::cout<<"\n\n";
|
|
||||||
//
|
|
||||||
// std::cout<<ripper.convert_int(bych)<<"\n\n";
|
|
||||||
|
|
||||||
ripper.list_channels();
|
ripper.list_channels();
|
||||||
std::ofstream fout("data/list_of_channels.dat");
|
std::ofstream fout("data/list_of_channels.dat");
|
||||||
ripper.list_channels(fout);
|
ripper.list_channels(fout);
|
||||||
fout.close();
|
fout.close();
|
||||||
// ripper.show_structure();
|
|
||||||
|
|
||||||
std::cout<<"number of channels "<<ripper.num_channels()<<"\n\n";
|
std::cout<<"number of channels "<<ripper.num_channels()<<"\n\n";
|
||||||
std::cout<<"number of groups "<<ripper.num_groups()<<"\n\n";
|
std::cout<<"number of groups "<<ripper.num_groups()<<"\n\n";
|
||||||
@ -33,7 +29,9 @@ int main(int argc, char* argv[])
|
|||||||
// for ( auto el: channA ) std::cout<<el<<"\n";
|
// for ( auto el: channA ) std::cout<<el<<"\n";
|
||||||
// std::cout<<"\n\n";
|
// std::cout<<"\n\n";
|
||||||
|
|
||||||
for ( int i = 0; i < ripper.num_channels(); i++ )
|
for ( int i = 0; i < 30; i++ )
|
||||||
|
// for ( int i = 0; i < ripper.num_channels(); i++ )
|
||||||
|
// for ( int i = 11880; i < ripper.num_channels(); i++ )
|
||||||
{
|
{
|
||||||
ripper.print_channel(i+1,("data/channel_"+std::to_string(i+1)+"_"
|
ripper.print_channel(i+1,("data/channel_"+std::to_string(i+1)+"_"
|
||||||
+ripper.channel_name(i+1)+".dat").c_str());
|
+ripper.channel_name(i+1)+".dat").c_str());
|
||||||
|
1
makefile
1
makefile
@ -17,6 +17,7 @@ tdm_ripper.o : lib/tdm_ripper.cpp lib/tdm_ripper.hpp
|
|||||||
clean :
|
clean :
|
||||||
rm -f $(EXE) *.o
|
rm -f $(EXE) *.o
|
||||||
rm -f *.dat
|
rm -f *.dat
|
||||||
|
rm -f data/*.dat
|
||||||
|
|
||||||
pylib : setup.py pytdm_ripper.pyx tdm_ripper.pxd tdm_ripper.o
|
pylib : setup.py pytdm_ripper.pyx tdm_ripper.pxd tdm_ripper.o
|
||||||
python3 setup.py build_ext --inplace
|
python3 setup.py build_ext --inplace
|
||||||
|
Loading…
x
Reference in New Issue
Block a user