commit 20566df9ca292463ab280f1b5a5b895382e05c81 Author: Mario Fink Date: Thu Feb 6 16:24:20 2020 +0000 initial commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..e5d6ffe --- /dev/null +++ b/README.md @@ -0,0 +1,78 @@ + +# raw_eater + +The _raw_eater_ package is used to parse files with extension `*.raw`, which +are usually binary files produced by the labsoftware _Famos_ to dump measurement +time series. + +## File Structure + +The binary `*.raw` file features a series of markers that indicate the starting +point of various blocks of information. Every markers is introduced by character +"|" = `0x 7c` followed by two uppercase letters, which characterize the type of +marker. The following markers are defined: + +1. CF (0x 43 46) +1. CK (0x 43 4b) +1. NO (0x 4e 4f) +1. CG (0x 43 47) +1. CD (0x 43 44) +1. NT (0x 4e 54) +1. CC (0x 43 43) +1. CP (0x 43 50) +1. CR (0x 43 52) +1. CN (0x 43 4e) +1. Cb (0x 43 62) +1. CS (0x 43 53) + +Each of these markers are followed by multiple commata (0x 2c) separated parameters +and are terminated by a semicolon `;` = 0x 3b, except for the sequence following +the data marker CS, that may have any number of 0x3b occurencies, while still +terminated by a semicolon at the very end of the file (since CS is the last marker +section in the file). The markers have the following meaning: + +- *CF* (mostly 4 parameters) + this marker is mostly just `|CF,2,1,1;` and hence I've got no fucking + idea what it actually means! +- *CK* (mostly 4 parameters) + same problem for this one: it always seems to be `|CK,1,3,1,1;` ... +- *NO* (mostly 6 parameters) + provides some info about the software package/device and its version that + produced the file, e.g. something like + `|NO,1,85,0,77,imc STUDIO 5.0 R3 (10.09.2015)@imc DEVICES 2.8R7 (26.8.2015)@imcDev__15190567,0,;` +- *CG* (mostly 5 parameters) + another one of these apparently useless markers, looks for instance like + `|CG,1,5,1,1,1;` +- *CD* (mostly 11 parameters) + since we're dealing with measured entities from the lab this markers contains + info about the measurement frequency, i.e. sample rate. For instance + `|CD,2, 63, 5.0000000000000001E-03,1,1,s,0,0,0, 0.0000000000000000E+00,1;` + indicates a measured entity every 0.005 seconds, i.e. a sample rate = 200Hz +- *NT* (mostly 8 parameters) + whatever ?!? for instance `|NT,1,16,1,1,1980,0,0,0.0;` + maybe it's the datatype ?? +- *CC* (mostly 4 parameters) + `|CC,1,3,1,1;` +- *CP* (mostly 10 parameters) + `|CP,1,16,1,4,7,32,0,0,1,0;` +- *CR* (mostly 8 parameters) + provides the _physical unit_ of the measured entity, maybe shows the + minimum and maximum value during the measurment, e.g. + `|CR,1,60,0, 1.0000000000000000E+00, 0.0000000000000000E+00,1,4,mbar;` +- *CN* (mostly 9 parameters) + gives the _name_ of the measured entity + `|CN,1,27,0,0,0,15,pressure_Vacuum,0,;` +- *Cb* (mostly 14 paramters) (optional?) + this one probably gives the minimum/maximum measured values!! + `|Cb,1,117,1,0,1,1,0,341288,0,341288,1,0.0000000000000000E+00,1.1781711390000000E+09,;` +- *CS* (mostly 4 parameters) + this markers announces the actual measurement data in binary format, + provide the number of values and the actual data, + e.g. `|CS,1, 341299, 1, ...data... ;` + +## Open Issues and question? + +- which parameter indicate(s) little vs. big endian? + + + diff --git a/check_markers.sh b/check_markers.sh new file mode 100755 index 0000000..f595321 --- /dev/null +++ b/check_markers.sh @@ -0,0 +1,12 @@ +##!/bin/bash/ + +dir=$1 + +#ls ${dir} | while read fn; do echo $fn; cat ${dir}$fn | grep -a "|[A-Z][A-Z]," -o | wc -l; done; + +#ls ${dir} | while read fn; do echo $fn; cat ${dir}$fn | grep -a "|[A-Z][A-Z]," -o; done; + +#ls ${dir} | while read fn; do echo $fn; cat ${dir}$fn | xxd | head -n10 | tail -n3; done; + +ls ${dir} | while read fn; do echo $fn; cat ${dir}$fn | grep -a "|[A-Z][a-zA-Z]," -o | wc -l; done; +ls ${dir} | while read fn; do echo $fn; cat ${dir}$fn | grep -a "|[A-Z][a-zA-Z]," -o; done; diff --git a/eatit b/eatit new file mode 100755 index 0000000..20dc16f Binary files /dev/null and b/eatit differ diff --git a/makefile b/makefile new file mode 100644 index 0000000..425e0f6 --- /dev/null +++ b/makefile @@ -0,0 +1,14 @@ + +RAW = ../raw/ + +SRC = src/ +EXE = eatit + +CCC = g++ +OPT = -O3 -Wall + +$(EXE) : $(SRC)main.cpp $(SRC)raweat.hpp + $(CCC) $(OPT) $< -o $@ + +clean : + rm -f $(EXE) diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..941958b --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,85 @@ +//---------------------------------------------------------------------------// + +#include "../src/raweat.hpp" + +//---------------------------------------------------------------------------// + +int main(int argc, char* argv[]) +{ + // path of filename provided ? + assert( argc > 1 && "please provide a filename and path" ); + + std::cout<<"number of CLI-arguments: "< +#include +#include +#include +#include +#include +#include + +//---------------------------------------------------------------------------// + +class raw_eater +{ + +private: + + // filename and path + std::string rawfile_; + + // raw buffer + std::vector rawdata_; + + // file format markers + std::map> markers_ = { + {"intro marker",{0x7c,0x43,0x46}}, + {"fileo marker",{0x7c,0x43,0x4b}}, + {"vendo marker",{0x7c,0x4e,0x4f}}, + {"param marker",{0x7c,0x43,0x47}}, + {"sampl marker",{0x7c,0x43,0x44}}, + {"typei marker",{0x7c,0x4e,0x54}}, + {"dimen marker",{0x7c,0x43,0x43}}, + {"datyp marker",{0x7c,0x43,0x50}}, + {"punit marker",{0x7c,0x43,0x52}}, + {"ename marker",{0x7c,0x43,0x4e}}, + {"minma marker",{0x7c,0x43,0x62}}, + {"datas marker",{0x7c,0x43,0x53}} + }; + + // data sections corresponding to markers + std::map> datasec_; + +public: + + // constructor + raw_eater(std::string rawfile) : rawfile_(rawfile) + { + // open file and put data in buffer + std::ifstream fin(rawfile.c_str(),std::ifstream::binary); + assert( fin.good() && "failed to open file" ); + try { + std::ifstream fin(rawfile.c_str(),std::ifstream::binary); + } + catch (std::ifstream::failure e) { + std::cerr<<"opening file " + rawfile + " failed"; + } + std::vector rawdata((std::istreambuf_iterator(fin)), + (std::istreambuf_iterator())); + rawdata_ = rawdata; + + // show size of buffer + std::cout<<"size of buffer "<> mrk : markers_ ) + { + assert( mrk.second.size() > 0 && "please don't defined any empty marker" ); + + // find marker's byte sequence in buffer + for ( unsigned long int idx = 0; idx < rawdata_.size(); idx++ ) + { + bool gotit = true; + for ( unsigned long int mrkidx = 0; mrkidx < mrk.second.size() && gotit; mrkidx ++ ) + { + if ( ! (mrk.second[mrkidx] == rawdata_[idx+mrkidx]) ) gotit = false; + } + + // if we got the marker, collect following bytes until end of marker byte 0x 3b + if ( gotit ) + { + // array of data associated to marker + std::vector markseq; + + if ( mrk.first != "datas marker" ) + { + // collect bytes until we find semicolon ";", i.e. 0x3b + int seqidx = 0; + while ( rawdata_[idx+seqidx] != 0x3b ) + { + markseq.push_back(rawdata_[idx+seqidx]); + seqidx++; + } + } + else + { + // make sure the data marker is actually the last and extends until end of file + //assert( TODO && "data marker doesn't appear to be the very last"); + + // that's the data itself + for ( unsigned long int didx = idx; didx < rawdata_.size()-1; didx++ ) + { + markseq.push_back(rawdata_[didx]); + } + } + } + } + } + + for (std::pair> mrk : markers_ ) + { + std::cout<