-> correction write_table vs. write_data in main.cpp

-> container build of pyarrow (i.a. for armv7l build)
This commit is contained in:
Mario Fink 2020-08-05 11:32:02 +02:00
parent 26930e960c
commit 3618ed9ee8
13 changed files with 2587 additions and 5 deletions

3
.gitignore vendored
View File

@ -1,8 +1,6 @@
/build
*.csv
eatraw
eatdev
@ -15,4 +13,3 @@ nohup.out
raw_eater.cpp
*.o

View File

@ -0,0 +1,26 @@
FROM ubuntu:19.10
RUN apt-get update -y && apt-get install -y \
apt-utils \
git g++ \
make cmake \
pkg-config \
#build-essentials \
python3 \
python3-setuptools \
cython3 \
python3-numpy
RUN git clone https://github.com/apache/arrow.git --single-branch --depth=1
COPY . ./
RUN chmod u+x ./build_arrow_cpp.sh
RUN chmod u+x ./build_arrow_python.sh
RUN ./build_arrow_cpp.sh
RUN ./build_arrow_python.sh
#RUN chmod u+x ./build_arrow.sh
#CMD ["./build_arrow.sh"]
CMD ["sleep 1d"]

65
lib/pyarrow_arm/build_arrow.sh Executable file
View File

@ -0,0 +1,65 @@
#!/bin/bash
sleep infinity
startts=$(date)
echo "starting build process at ${startts}..."
echo -e "\nhome directory is..."
pwd
echo -e "\ncloning apache/arrow..."
git clone https://github.com/apache/arrow.git --single-branch --depth=1
echo -e "\nls -lh /\n"
ls -lh /
echo -e "\nls -lh arrow/\n"
ls -lh arrow/
echo -e "\nls -lh arrow/python/\n"
ls -lh arrow/python
mkdir arrow/cpp/build
pushd arrow/cpp/build
cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DCMAKE_INSTALL_LIBDIR=lib \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_BROTLI=ON \
-DARROW_PARQUET=ON \
-DARROW_PYTHON=ON \
-DARROW_BUILD_TESTS=OFF \
-DARROW_WITH_HDFS=OFF \
..
make -j4
make install
popd
#cython --version
cython3 --version
pushd arrow/python
export ARROW_LIB_DIR=/lib/
export PYARROW_WITH_PARQUET=1
export PYARROW_WITH_CUDA=0
export PYARROW_WITH_FlIGHT=0
export PYARROW_WITH_DATASET=0
export PYARROW_WITH_ORC=0
export PYARROW_WITH_PLASMA=0
export PYARROW_WITH_S3FS=0
export PYARROW_WITH_HDFS=0
export PYARROW_WITH_GANDIVA=0
python3 setup.py build_ext --inplace
popd
echo " started build process at ${startts} ..."
finishts=$(date)
echo "finishing build process at ${finishts}..."

View File

@ -0,0 +1,23 @@
#!/bin/bash
mkdir arrow/cpp/build
pushd arrow/cpp/build
cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DCMAKE_INSTALL_LIBDIR=lib \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_BROTLI=ON \
-DARROW_PARQUET=ON \
-DARROW_PYTHON=ON \
-DARROW_BUILD_TESTS=OFF \
-DARROW_WITH_HDFS=OFF \
-DARROW_WITH_IPC=OFF \
..
make -j4
make install
popd

View File

@ -0,0 +1,15 @@
#!/bin/bash
pushd arrow/python
export PYARROW_WITH_PARQUET=1
export PYARROW_WITH_CUDA=0
export PYARROW_WITH_FlIGHT=0
export PYARROW_WITH_DATASET=0
export PYARROW_WITH_ORC=0
export PYARROW_WITH_PLASMA=0
export PYARROW_WITH_S3FS=0
export PYARROW_WITH_HDFS=0
export PYARROW_WITH_GANDIVA=0
# python3 setup.py build_ext --inplace
python3 setup.py install
popd

23
lib/pyarrow_arm/makefile Normal file
View File

@ -0,0 +1,23 @@
build :
docker build . --tag pyarrowbuild
run :
docker run -it pyarrowbuild:latest
run-bash :
docker run -it --volume=$(pwd)/build:/home pyarrowbuild:latest /bin/bash
run-volume :
docker run -it -v /home/pirate/pyarrow/build/:/arrow/python/ pyarrowbuild:latest
#sudo docker run -it --volume=$(pwd)/build:/home ubuntu:latest /bin/bash
rm-container :
cont=$(docker ps -a | tail -n 26 | awk '{print $NF}' | sed ':a;N;$!ba;s/\n/ /g')
echo ${cont}
docker rm ${cont}
rm-image :
img=$(docker image ls --quiet | sed ':a;N;$!ba;s/\n/ /g')
docker image rm ${img}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,18 @@
import pyarrow.parquet as pq
import pyarrow.csv as pv
csvfile = 'pressureVacuum.csv'
tb = pv.read_csv(csvfile,parse_options=pv.ParseOptions(delimiter=','))
print(tb)
parquetfile = 'pressureVacuum.parquet'
pq.write_table(tb,parquetfile,compression='SNAPPY')
# {NONE, SNAPPY, GZIP, LZO, BROTLI, LZ4, ZSTD}
df = pq.read_table(parquetfile,columns=None)
print(df)

8
lib/pyarrow_arm/sync_pi.sh Executable file
View File

@ -0,0 +1,8 @@
#!/bin/bash
if [ -z "$1" ]
then
exit 1
fi
scp $1 pirate@mf-pi-40:/home/pirate/pyarrow/

View File

@ -4,6 +4,10 @@ from Cython.Build import cythonize
extensions = Extension(
name="raw_eater",
version="0.1.0",
author="Mario Fink",
author_email="mario.fink@record-evolution.de",
url="https://github.com/RecordEvolution/raw_eater.git",
sources=["raw_eater.pyx"],
# libraries=[""],
library_dirs=["src"],

View File

@ -57,8 +57,8 @@ int main(int argc, char* argv[])
// for ( unsigned long int i = 0; i < 10; i++ ) std::cout<<mydata[i]<<"\n";
// write data in csv-file
// eatraw.write_data(std::string(argv[2]));
eatraw.write_table(std::string(argv[2]),' ');
eatraw.write_table(std::string(argv[2]));
// eatraw.write_table(std::string(argv[2]),' ');
return 0;
}