Compare commits

..

10 Commits

Author SHA1 Message Date
Mario Fink
b110886935 add *.pyd to gitignore, imc_channel.hpp: fix int size_t conversions 2021-09-02 11:22:37 +02:00
Mario Fink
6bc6880d47 fix/disable type conversion warnings 2021-09-02 11:16:01 +02:00
Mario Fink
b51b63dedc enable proper git tag sorting in cython/setup.py for versioning 2021-09-02 11:01:12 +02:00
Mario Fink
601613b6c4 consider platform dependency for building cython extension locally 2021-09-02 10:57:21 +02:00
5e93ed0706 * version 1.2.10
* makefile: versioning and tags: properly deal with multiple digits version tags
* setup.py: support both sdist and bdist pip wheels
2021-09-01 10:58:42 +02:00
Mario Fink
1345f6e4c9 add separate NT trigger time and Cb addtime parameters to output 2021-07-12 13:19:45 +02:00
Mario Fink
1a381f01b7 delete deprecated python example, add new python example dealing with absolute time stamp according to trigger-time 2021-07-12 12:40:23 +02:00
Mario Fink
b42a170650 bump version 1.2.9 2021-07-08 15:39:50 +02:00
Mario Fink
1d30a5f237 include (absolute) trigger-time in channel output => issue #10 2021-07-08 14:06:20 +02:00
Mario Fink
00a869ff07 properly check markers of single file/all files in directory 2021-07-08 10:17:29 +02:00
10 changed files with 190 additions and 184 deletions

1
.gitignore vendored
View File

@@ -15,6 +15,7 @@ cython/*.cpp
*.log
*.so
*.pyd
*.o
*.csv
*.parquet

View File

@@ -1,6 +1,28 @@
from distutils.core import setup
from distutils.extension import Extension
from Cython.Build import cythonize
import os
import sys
print("building on platform: "+sys.platform)
os.system("git tag -l --sort=version:refname > gittags.log")
with open ("gittags.log","r") as gt:
taglst = gt.readlines()
os.remove("gittags.log")
if len(taglst) > 0 :
version = taglst[-1].replace('\n','').replace('v','')
else:
version = 'unkown'
print("building version: "+version)
if sys.platform == "linux" or sys.platform == "darwin" :
cmpargs = ['-std=c++17','-Wno-unused-variable']
lnkargs = ['-std=c++17','-Wno-unused-variable']
elif sys.platform == "win32" :
cmpargs = ['/EHsc','/std:c++17']
lnkargs = []
else :
raise RuntimeError("unknown platform")
extensions = Extension(
name="imc_termite",
@@ -9,13 +31,13 @@ extensions = Extension(
# library_dirs=["lib"],
include_dirs=["lib"],
language='c++',
extra_compile_args=['-std=c++17','-Wno-unused-variable'],
extra_link_args=['-std=c++17'],
extra_compile_args=cmpargs,
extra_link_args=lnkargs,
)
setup(
name="imc_termite",
version='1.2.8',
name='imc_termite',
version='1.2.10',
description='IMCtermite cython extension',
author='Record Evolution GmbH',
author_email='mario.fink@record-evolution.de',

View File

@@ -3,10 +3,12 @@
#ifndef IMCCHANNEL
#define IMCCHANNEL
#include <sstream>
#include "imc_datatype.hpp"
#include "imc_conversion.hpp"
#include <sstream>
#include <math.h>
#include <chrono>
#include <ctime>
//---------------------------------------------------------------------------//
@@ -106,7 +108,8 @@ namespace imc
std::string joinvec(std::vector<dt> myvec, unsigned long int limit = 10, int prec = 10, bool fixed = true)
{
// include entire list for limit = 0
limit = (limit == 0) ? myvec.size() : limit;
unsigned long int myvecsize = (unsigned long int)myvec.size();
limit = (limit == 0) ? myvecsize : limit;
std::stringstream ss;
ss<<"[";
@@ -120,14 +123,14 @@ namespace imc
}
else
{
unsigned long int heals = (unsigned long int)(limit/2.);
unsigned long int heals = limit/2;
for ( unsigned long int i = 0; i < heals; i++ )
{
customize_stream(ss,prec,fixed);
ss<<myvec[i]<<",";
}
ss<<"...";
for ( unsigned long int i = myvec.size()-heals; i < myvec.size(); i++ )
for ( unsigned long int i = myvecsize-heals; i < myvecsize; i++ )
{
customize_stream(ss,prec,fixed);
ss<<myvec[i]<<",";
@@ -152,6 +155,8 @@ namespace imc
std::string uuid_;
std::string name_, comment_;
std::string origin_, origin_comment_, text_;
std::chrono::system_clock::time_point trigger_time_, absolute_trigger_time_;
double trigger_time_frac_secs_;
std::string language_code_, codepage_;
std::string yname_, yunit_;
std::string xname_, xunit_;
@@ -162,6 +167,7 @@ namespace imc
int signbits_, num_bytes_;
// unsigned long int byte_offset_;
unsigned long int buffer_offset_, buffer_size_;
long int addtime_;
int datatp_;
imc::datatype dattyp_;
std::vector<imc::datatype> ydata_;
@@ -235,6 +241,7 @@ namespace imc
buffer_offset_ = std::stoul(blocks_->at(chnenv_.Cbuuid_).get_parameter(prms[6]));
buffer_size_ = std::stoul(blocks_->at(chnenv_.Cbuuid_).get_parameter(prms[7]));
xoffset_ = std::stod(blocks_->at(chnenv_.Cbuuid_).get_parameter(prms[11]));
addtime_ = (long int)std::stod(blocks_->at(chnenv_.Cbuuid_).get_parameter(prms[12]));
}
// extract associated CR data
@@ -277,8 +284,35 @@ namespace imc
language_code_ = blocks_->at(chnenv_.NLuuid_).get_parameter(prms[3]);
}
// obtain NT data
// - https://en.cppreference.com/w/cpp/chrono/c/tm
// - https://en.cppreference.com/w/cpp/io/manip/put_time
if ( blocks_->count(chnenv_.NTuuid_) == 1 )
{
prms = blocks_->at(chnenv_.NTuuid_).get_parameters();
//std::tm tm{};
std::tm tms = std::tm();
tms.tm_mday = std::stoi(blocks_->at(chnenv_.NTuuid_).get_parameter(prms[2]));
tms.tm_mon = std::stoi(blocks_->at(chnenv_.NTuuid_).get_parameter(prms[3])) - 1;
tms.tm_year = std::stoi(blocks_->at(chnenv_.NTuuid_).get_parameter(prms[4])) - 1900;
tms.tm_hour = std::stoi(blocks_->at(chnenv_.NTuuid_).get_parameter(prms[5]));
tms.tm_min = std::stoi(blocks_->at(chnenv_.NTuuid_).get_parameter(prms[6]));
long double secs = std::stold(blocks_->at(chnenv_.NTuuid_).get_parameter(prms[7]));
double secs_int;
trigger_time_frac_secs_ = modf((double)secs,&secs_int);
tms.tm_sec = (int)secs_int;
// generate std::chrono::system_clock::time_point type
std::time_t ts = std::mktime(&tms);
trigger_time_ = std::chrono::system_clock::from_time_t(ts);
}
// start converting binary buffer to imc::datatype
if ( !chnenv_.CSuuid_.empty() ) convert_buffer();
// calculate absolute trigger-time
absolute_trigger_time_ = trigger_time_ + std::chrono::seconds(addtime_);
// + std::chrono::nanoseconds((long int)(trigger_time_frac_secs_*1.e9));
}
// convert buffer to actual datatype
@@ -351,7 +385,7 @@ namespace imc
// fill xdata_
for ( unsigned long int i = 0; i < num_values; i++ )
{
xdata_.push_back(xoffset_+i*xstepwidth_);
xdata_.push_back(xoffset_+(double)i*xstepwidth_);
}
// employ data transformation
@@ -368,12 +402,18 @@ namespace imc
// get info string
std::string get_info(int width = 20)
{
// prepare printable trigger-time
std::time_t tt = std::chrono::system_clock::to_time_t(trigger_time_);
std::time_t att = std::chrono::system_clock::to_time_t(absolute_trigger_time_);
std::stringstream ss;
ss<<std::setw(width)<<std::left<<"uuid:"<<uuid_<<"\n"
<<std::setw(width)<<std::left<<"name:"<<name_<<"\n"
<<std::setw(width)<<std::left<<"comment:"<<comment_<<"\n"
<<std::setw(width)<<std::left<<"origin:"<<origin_<<"\n"
<<std::setw(width)<<std::left<<"description:"<<text_<<"\n"
<<std::setw(width)<<std::left<<"trigger-time-nt:"<<std::put_time(std::localtime(&tt),"%FT%T")<<"\n"
<<std::setw(width)<<std::left<<"trigger-time:"<<std::put_time(std::localtime(&att),"%FT%T")<<"\n"
<<std::setw(width)<<std::left<<"language-code:"<<language_code_<<"\n"
<<std::setw(width)<<std::left<<"codepage:"<<codepage_<<"\n"
<<std::setw(width)<<std::left<<"yname:"<<yname_<<"\n"
@@ -382,6 +422,7 @@ namespace imc
<<std::setw(width)<<std::left<<"significant bits:"<<signbits_<<"\n"
<<std::setw(width)<<std::left<<"buffer-offset:"<<buffer_offset_<<"\n"
<<std::setw(width)<<std::left<<"buffer-size:"<<buffer_size_<<"\n"
<<std::setw(width)<<std::left<<"add-time:"<<addtime_<<"\n"
<<std::setw(width)<<std::left<<"xname:"<<xname_<<"\n"
<<std::setw(width)<<std::left<<"xunit:"<<xunit_<<"\n"
<<std::setw(width)<<std::left<<"xstepwidth:"<<xstepwidth_<<"\n"
@@ -399,17 +440,24 @@ namespace imc
// provide JSON string of metadata
std::string get_json(bool include_data = false)
{
// prepare printable trigger-time
std::time_t tt = std::chrono::system_clock::to_time_t(trigger_time_);
std::time_t att = std::chrono::system_clock::to_time_t(absolute_trigger_time_);
std::stringstream ss;
ss<<"{"<<"\"uuid\":\""<<uuid_
<<"\",\"name\":\""<<name_
<<"\",\"comment\":\""<<comment_
<<"\",\"origin\":\""<<origin_
<<"\",\"description\":\""<<text_
<<"\",\"trigger-time-nt\":\""<<std::put_time(std::localtime(&tt),"%FT%T")
<<"\",\"trigger-time\":\""<<std::put_time(std::localtime(&att),"%FT%T")
<<"\",\"language-code\":\""<<language_code_
<<"\",\"codepage\":\""<<codepage_
<<"\",\"yname\":\""<<yname_
<<"\",\"yunit\":\""<<yunit_
<<"\",\"significantbits\":\""<<signbits_
<<"\",\"addtime\":\""<<addtime_
<<"\",\"xname\":\""<<xname_
<<"\",\"xunit\":\""<<xunit_
<<"\",\"xstepwidth\":\""<<xstepwidth_

View File

@@ -17,13 +17,13 @@ HPP = $(wildcard $(LIB)/*.hpp)
# choose compiler and its options
CC = g++ -std=c++17
#OPT = -O3 -Wall -mavx -mno-tbm -mf16c -mno-f16c
OPT = -O3 -Wall -Werror -Wunused-variable -Wsign-compare
OPT = -O3 -Wall -Wconversion -Wpedantic -Werror -Wunused-variable -Wsign-compare
# determine git version/commit and release tag
GTAG := $(shell git tag | tail -n1)
GTAG := $(shell git tag -l --sort=version:refname | tail -n1)
GHSH := $(shell git rev-parse HEAD | head -c8)
RTAG := v$(shell cat pip/setup.py | grep version | grep -oP "([0-9]\.){2}[0-9]")
CTAG := v$(shell cat cython/setup.py | grep version | grep -oP "([0-9]\.){2}[0-9]")
RTAG := v$(shell cat pip/setup.py | grep version | grep -oP "([0-9]\.){2}[0-9]{1,2}")
CTAG := v$(shell cat cython/setup.py | grep version | grep -oP "([0-9]\.){2}[0-9]{1,2}")
# define install location
INST := /usr/local/bin
@@ -99,7 +99,7 @@ cython-clean :
# pip
pip-release: check-vtag $(RTAG) cython-build
cd ./pip/ && make publish
cd ./pip/ && make publish-source
#-----------------------------------------------------------------------------#
# clean

View File

@@ -2,15 +2,23 @@
SHELL := /bin/bash
publish: sdist upload
publish-source: sdist upload
publish-binary: bdist upload
sdist: ../cython/py_imc_termite.pyx ../cython/imc_termite.pxd ../cython/py_imc_termite.cpp
prepdist: ../cython/py_imc_termite.pyx ../cython/imc_termite.pxd ../cython/py_imc_termite.cpp
cp -v $? ./
cp -v $(shell ls ../lib/imc_*.hpp) ./
tail -n 212 ../README.md > ./README.md
cp -v ../LICENSE ./
sdist: prepdist
python3 setup.py sdist
# TODO use manylinux wheel to avoid ERROR "unsupported platform tag 'linux_x86_64'"
# see: - https://github.com/pypa/manylinux
bdist: prepdist
python3 setup.py bdist_wheel
# authentication:
# - username: __token__
# - password: <token value including pypi-prefix>

View File

@@ -18,7 +18,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
setup(
name="IMCtermite",
version="1.2.8",
version="1.2.10",
author="Record Evolution GmbH",
author_email="mario.fink@record-evolution.de",
maintainer="Record Evolution GmbH",

View File

@@ -1,137 +0,0 @@
#-----------------------------------------------------------------------------#
import raw_eater
import raw_meat
import pyarrow as pa
import pyarrow.parquet as pq
from pathlib import Path
fileobj1 = Path("samples/datasetA/").rglob("*.raw")
rawlist1 = [str(fl) for fl in fileobj1]
fileobj2 = Path("samples/datasetB/").rglob("*.raw")
rawlist2 = [str(fl) for fl in fileobj2]
rawlist = rawlist1 #[rawlist1[0],rawlist1[4],rawlist2[0],rawlist2[6]]
for fil in rawlist2 :
rawlist.append(fil)
rawlist.append("./README.md")
print("")
print(rawlist)
print()
#-----------------------------------------------------------------------------#
# alternatively create "empty" instance of "raw_eater" and set file names
eatraw = raw_eater.raweater()
# eatraw.set_file("../smp/pressure_Vacuum.raw".encode())
# convert every single listed file
for rf in rawlist :
print("converting " + str(rf) + "...\n" + 90*("-") + "\n")
# setup instance of "raw_eater" and trigger conversion
# eatraw = raw_eater.raweater(rf.encode())
# eatraw = raw_meat.rawmerger(rf.encode())
# use global instance of "raw_eater" to set file and perform decoding
eatraw.set_file(rf.encode())
try :
eatraw.do_conversion()
except RuntimeError as e :
print("conversion failed: " + str(e))
# check validity of file format
if eatraw.validity() :
# show channel name and its unit
entity = eatraw.channel_name().decode(encoding='UTF-8',errors='ignore')
unit = eatraw.unit().decode(encoding='UTF-8',errors='ignore')
print("\nentity: " + str(entity))
print("unit: " + str(unit) + "\n")
# obtain extracted data
xt = eatraw.get_time()
yt = eatraw.get_channel()
# show excerpt of data
print("time (length: " + str(len(xt)) + ") \n"
+ str(xt[:10]) + "\n...\n" + str(xt[-10:]) + "\n")
yttrunc = [round(y,4) for y in yt]
print(str(entity) + " (length: " + str(len(yttrunc)) + ") \n"
+ str(yttrunc[:10]) + "\n...\n" + str(yttrunc[-10:]) + "\n")
outname = rf.split('/')[-1].replace('raw','csv')
print("write output to : " + outname)
eatraw.write_table(("output/"+outname).encode(),ord(' '))
else :
print("\nerror: invalid/corrupt .raw file")
print("\n")
#-----------------------------------------------------------------------------#
print("convert and merge channels " + "\n" + 90*("-") + "\n")
# setup new instance to merge channels
eatmea = raw_meat.rawmerger(''.encode()) #rawlist[0].encode())
# add every single channel/file in list
for rf in rawlist :
print("\nadding channel " + str(rf))
try :
succ = eatmea.add_channel(rf.encode())
print("\nrecent time series: length: " + str(len(eatmea.get_time_series())) + "\n")
except RuntimeError as e :
print("failed to add channel: " + str(e))
# show summary of successfully merged channels
print("\nmerged channels:\n")
# write merged table to .csv output
eatmea.write_table_all('output/allchannels.csv'.encode(),ord(','))
# get number of successfully merged channels and their names (+units)
numch = eatmea.get_num_channels()
chnames = [chnm.decode(encoding='UTF-8',errors='ignore') for chnm in eatmea.get_channel_names()]
print("number of channels: " + str(numch))
print("channel names: " + str(chnames))
# obtain final time series
timse = eatmea.get_time_series()
print("\nfinal time series:\nlength:" + str(len(timse)) + "\n")
# get time unit and prepend column name
chnames.insert(0,"Time ["+str(eatmea.time_unit().decode(encoding='UTF-8',errors='ignore'))+"]")
# prepare list of pyarrow arrays
pyarrs = []
pyarrs.append(pa.array(timse))
for i in range(0,numch) :
print("\n" + str(i) + " " + str(chnames[i]))
dat = eatmea.get_channel_by_index(i)
print("length: " + str(len(dat)))
pyarrs.append(pa.array(dat))
print("")
# print("\npyarrow arrays\n" + str(pyarrs))
# create pyarrow table from data
pyarwtab = pa.Table.from_arrays(pyarrs,chnames)
print("\n" + 60*"-" + "\n" + str(pyarwtab) + "\n")
# write pyarrow table to .parquet file with compression
pq.write_table(pyarwtab,'output/allchannels.parquet',compression='BROTLI') # compression='BROTLI', 'SNAPPY')
# try to read and decode the .parquet file
df = pq.read_table('output/allchannels.parquet')
print(df.to_pandas())
# df.to_pandas().to_csv('allchannels.csv',index=False,encoding='utf-8',sep=",")
#-----------------------------------------------------------------------------#

View File

@@ -1,24 +0,0 @@
import pyarrow as pa
import numpy as np
import pyarrow.parquet as pq
db = pa.array(np.linspace(10,50,6))
print(db)
da = pa.array(np.linspace(0,5,6))
print(db)
filenam = 'pyarrow_testtab.parquet'
patab = pa.Table.from_arrays([da,db],['entity A [unitA]','entity B [unitB]'])
print(patab)
# pq.write_table(patab,filenam,compression='BROTLI')
pq.write_table(patab,filenam,compression='SNAPPY')
df = pq.read_table(filenam)
print(df)
print(df.to_pandas())
#import readline
#readline.write_history_file('generate_pyarrow_table_and_write_parquet.py')

50
python/usage_ext.py Normal file
View File

@@ -0,0 +1,50 @@
import imc_termite
import json
import os
import datetime
# declare and initialize instance of "imctermite" by passing a raw-file
try :
imcraw = imc_termite.imctermite(b"samples/sampleB.raw")
except RuntimeError as e :
raise Exception("failed to load/parse raw-file: " + str(e))
# obtain list of channels as list of dictionaries (without data)
channels = imcraw.get_channels(False)
print(json.dumps(channels,indent=4, sort_keys=False))
# obtain all channels (including full data)
channelsdata = imcraw.get_channels(True)
# everything that follows is an example that specifically makes use only of
# the first (index = 0) channel ...
idx = 0
if len(channelsdata) > 0 :
# get first channel's data
chnydata = channelsdata[idx]['ydata']
chnxdata = channelsdata[idx]['xdata']
print("xdata: " + str(len(chnxdata)))
print("ydata: " + str(len(chnydata)))
# extract trigger-time
trigtim = datetime.datetime.fromisoformat(channels[idx]["trigger-time"])
print(trigtim)
# file output of data with absolute timestamp in 1st column
filname = os.path.join("./",channelsdata[idx]['name']+".csv")
print("writing output into " + filname)
with open(filname,'w') as fout :
# include column header
fout.write( str(channelsdata[idx]['xname']) + '[' + str(channelsdata[idx]['xunit']) + "]"
+ ","
+ str(channelsdata[idx]['yname']) + '[' + str(channelsdata[idx]['yunit']) + "]"
+ "\n" )
# add data (introduce time shift according to trigger-time)
for row in range(0,len(chnxdata)) :
fout.write( str( (trigtim + datetime.timedelta(seconds=chnxdata[row])).isoformat() )
+ ","
+ str( chnydata[row])
+ "\n" )

View File

@@ -1,12 +1,50 @@
##!/bin/bash/
dir=$1
fildir=$1
#ls ${dir} | while read fn; do echo $fn; cat ${dir}$fn | grep -a "|[A-Z][A-Z]," -o | wc -l; done;
if [ -z "${fildir}" ]; then
echo "CLI argument missing: provide file or directory" >&2
exit 1
fi
#ls ${dir} | while read fn; do echo $fn; cat ${dir}$fn | grep -a "|[A-Z][A-Z]," -o; done;
list-markers()
{
arg="$1"
if [ -z "${arg}" ]; then
echo "list-markers: missing file argument" >&2
exit 1
else
if [ -d "${arg}" ]; then
echo "list-markers: file argument is a directory" >&2
exit 1
fi
fi
#ls ${dir} | while read fn; do echo $fn; cat ${dir}$fn | xxd | head -n10 | tail -n3; done;
echo -e "\n${arg}\n"
mrks=$(cat ${arg} | grep -a "|[A-Z][a-zA-Z]," -o)
mrksnum=$(echo "${mrks}" | wc -l)
echo -e "(${mrksnum})\n${mrks}"
}
if [ -f "${fildir}" ]; then
echo "analyzing single file ${fildir} ..."
list-markers "${fildir}"
elif [ -d "${fildir}" ]; then
echo "analyzing all *.raw files in directory ${fildir} ..."
lsfls=$(ls ${fildir}/*.raw | sed 's/\/\//\//g')
echo -e "\n${lsfls}"
for fl in ${lsfls}; do
list-markers "${fl}"
done
else
echo "does not exist: ${fildir}" >&2
exit 1
fi
ls ${dir} | while read fn; do echo $fn; cat ${dir}$fn | grep -a "|[A-Z][a-zA-Z]," -o | wc -l; done;
ls ${dir} | while read fn; do echo $fn; cat ${dir}$fn | grep -a "|[A-Z][a-zA-Z]," -o; done;