extract essential dependecies

This commit is contained in:
Mario Fink 2020-07-31 14:01:19 +02:00
parent 984895a4f2
commit aee2c180a7
5 changed files with 58 additions and 18 deletions

View File

@ -1,3 +1,4 @@
#-----------------------------------------------------------------------------#
PARQUETDIR := /home/mario/Desktop/Record_Evolution/parquet-cpp
ARROWDIR := /home/mario/Desktop/Record_Evolution/arrow/cpp/src
@ -38,8 +39,44 @@ test :
clean :
rm -r parquet/ arrow/
rm reader-writer.cc reader_writer.h
#-----------------------------------------------------------------------------#
#------------------------------------------------------------------------------------#
# choose shell
SHELL:=/bin/bash
SRC = reader-writer
# specify path of cloned directory
ARROWGIT := /home/mario/Desktop/Record_Evolution/arrow
filewriter : parquet/file_writer.cc
$(CPP) -c $(OPT) $<
# build executable (and generate dependency file)
readwrite : reader-writer.cc
$(CPP) $(OPT) -MMD $< -I ./
# generate dependency file
$(SRC).d : $(SRC).cc
$(CPP) -c -MMD $< -I ./ -I $(ARROWGIT)/cpp/src/
# extract source dependencies
extract-dep : $(SRC).d
@# extract relevant dependencies
cat $< | sed 's/ /\n/g' | awk 'NF' | grep -v '\\' | grep '\/' > deps.log
cat deps.log | sed ':a;N;$!ba;s/\n/ /g' > headers.log
cat headers.log | sed 's/.h$$/.cc/g' > sources.log
@# copy required sources
mkdir -p temp/
cp --parents `cat headers.log` temp/
cp --parents `cat sources.log` temp/ 2>/dev/null
mv temp$(ARROWGIT)/cpp/src/* ./
rm -r temp
clean-dep :
rm -f deps.log headers.log sources.log $(SRC).d
#-----------------------------------------------------------------------------#
# only use more recent and up to date repository arrow.git
# build arrow shared/static libraries
@ -56,6 +93,4 @@ example :
# set environment variable LD_LIBRARY_PATH=../../../../cpp/build/release/ before launching executable
#------------------------------------------------------------------------------------#

View File

@ -56,14 +56,14 @@ int main(int argc, char** argv) {
// Create a local file output stream instance.
using FileClass = ::arrow::io::FileOutputStream;
std::shared_ptr<FileClass> out_file;
PARQUET_THROW_NOT_OK(FileClass::Open(PARQUET_FILENAME, &out_file));
PARQUET_ASSIGN_OR_THROW(out_file, FileClass::Open(PARQUET_FILENAME));
// Setup the parquet schema
std::shared_ptr<GroupNode> schema = SetupSchema();
// Add writer properties
parquet::WriterProperties::Builder builder;
builder.compression(parquet::Compression::SNAPPY);
builder.compression(parquet::Compression::UNCOMPRESSED);
std::shared_ptr<parquet::WriterProperties> props = builder.build();
// Create a ParquetFileWriter instance
@ -205,6 +205,8 @@ int main(int argc, char** argv) {
int i;
std::shared_ptr<parquet::ColumnReader> column_reader;
ARROW_UNUSED(rows_read); // prevent warning in release build
// Get the Column Reader for the boolean column
column_reader = row_group_reader->Column(0);
parquet::BoolReader* bool_reader =
@ -292,6 +294,7 @@ int main(int argc, char** argv) {
assert(values_read == 1);
// Verify the value written
parquet::Int96 expected_value;
ARROW_UNUSED(expected_value); // prevent warning in release build
expected_value.value[0] = i;
expected_value.value[1] = i + 1;
expected_value.value[2] = i + 2;
@ -359,6 +362,7 @@ int main(int argc, char** argv) {
assert(rows_read == 1);
// Verify the value written
char expected_value[FIXED_LENGTH] = "parquet";
ARROW_UNUSED(expected_value); // prevent warning in release build
expected_value[7] = static_cast<char>('0' + i / 100);
expected_value[8] = static_cast<char>('0' + (i / 10) % 10);
expected_value[9] = static_cast<char>('0' + i % 10);

BIN
lib/parquet/reader-writer.o Normal file

Binary file not shown.

View File

@ -21,7 +21,7 @@
#include <parquet/api/reader.h>
#include <parquet/api/writer.h>
using parquet::LogicalType;
using parquet::ConvertedType;
using parquet::Repetition;
using parquet::Type;
using parquet::schema::GroupNode;
@ -34,34 +34,34 @@ static std::shared_ptr<GroupNode> SetupSchema() {
// Create a primitive node named 'boolean_field' with type:BOOLEAN,
// repetition:REQUIRED
fields.push_back(PrimitiveNode::Make("boolean_field", Repetition::REQUIRED,
Type::BOOLEAN, LogicalType::NONE));
Type::BOOLEAN, ConvertedType::NONE));
// Create a primitive node named 'int32_field' with type:INT32, repetition:REQUIRED,
// logical type:TIME_MILLIS
fields.push_back(PrimitiveNode::Make("int32_field", Repetition::REQUIRED, Type::INT32,
LogicalType::TIME_MILLIS));
ConvertedType::TIME_MILLIS));
// Create a primitive node named 'int64_field' with type:INT64, repetition:REPEATED
fields.push_back(PrimitiveNode::Make("int64_field", Repetition::REPEATED, Type::INT64,
LogicalType::NONE));
ConvertedType::NONE));
fields.push_back(PrimitiveNode::Make("int96_field", Repetition::REQUIRED, Type::INT96,
LogicalType::NONE));
ConvertedType::NONE));
fields.push_back(PrimitiveNode::Make("float_field", Repetition::REQUIRED, Type::FLOAT,
LogicalType::NONE));
ConvertedType::NONE));
fields.push_back(PrimitiveNode::Make("double_field", Repetition::REQUIRED, Type::DOUBLE,
LogicalType::NONE));
ConvertedType::NONE));
// Create a primitive node named 'ba_field' with type:BYTE_ARRAY, repetition:OPTIONAL
fields.push_back(PrimitiveNode::Make("ba_field", Repetition::OPTIONAL, Type::BYTE_ARRAY,
LogicalType::NONE));
ConvertedType::NONE));
// Create a primitive node named 'flba_field' with type:FIXED_LEN_BYTE_ARRAY,
// repetition:REQUIRED, field_length = FIXED_LENGTH
fields.push_back(PrimitiveNode::Make("flba_field", Repetition::REQUIRED,
Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE,
Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::NONE,
FIXED_LENGTH));
// Create a GroupNode named 'schema' using the primitive nodes defined above

View File

@ -0,0 +1 @@
#!/bin/bash