diff --git a/lib/parquet/makefile b/lib/parquet/makefile index 8be42ea..c09a40b 100644 --- a/lib/parquet/makefile +++ b/lib/parquet/makefile @@ -1,9 +1,10 @@ +#-----------------------------------------------------------------------------# PARQUETDIR := /home/mario/Desktop/Record_Evolution/parquet-cpp ARROWDIR := /home/mario/Desktop/Record_Evolution/arrow/cpp/src CPP := g++ -std=c++14 -OPT := +OPT := #-Wall -Woverflow -Wpedantic -Wextra -Waddress -Waligned-new -Walloc-zero prepare : collect_parquet modify_parquet collect_arrow modify_arrow @@ -12,7 +13,7 @@ collect_parquet : cp -r $(PARQUETDIR)/src/parquet ./ cp $(PARQUETDIR)/examples/low-level-api/reader_writer.h ./ cp $(PARQUETDIR)/examples/low-level-api/reader-writer.cc ./ - + modify_parquet : cp parquet/parquet_version.h.in parquet/parquet_version.h sed -i 's/ReadableFileInterface/ReadWriteFileInterface/g' parquet/util/memory.h @@ -31,15 +32,51 @@ collect_test : subst : sed -i 's/#include \"arrow\//\/\/#include \"arrow/g' parquet/properties.h - + test : $(CPP) $(OPT) -I$(PWD) reader-writer.cc clean : rm -r parquet/ arrow/ rm reader-writer.cc reader_writer.h +#-----------------------------------------------------------------------------# -#------------------------------------------------------------------------------------# +# choose shell +SHELL:=/bin/bash + +SRC = reader-writer + +# specify path of cloned directory +ARROWGIT := /home/mario/Desktop/Record_Evolution/arrow + +filewriter : parquet/file_writer.cc + $(CPP) -c $(OPT) $< + +# build executable (and generate dependency file) +readwrite : reader-writer.cc + $(CPP) $(OPT) -MMD $< -I ./ + +# generate dependency file +$(SRC).d : $(SRC).cc + $(CPP) -c -MMD $< -I ./ -I $(ARROWGIT)/cpp/src/ + +# extract source dependencies +extract-dep : $(SRC).d + @# extract relevant dependencies + cat $< | sed 's/ /\n/g' | awk 'NF' | grep -v '\\' | grep '\/' > deps.log + cat deps.log | sed ':a;N;$!ba;s/\n/ /g' > headers.log + cat headers.log | sed 's/.h$$/.cc/g' > sources.log + @# copy required sources + mkdir -p temp/ + cp --parents `cat headers.log` temp/ + cp --parents `cat sources.log` temp/ 2>/dev/null + mv temp$(ARROWGIT)/cpp/src/* ./ + rm -r temp + +clean-dep : + rm -f deps.log headers.log sources.log $(SRC).d + +#-----------------------------------------------------------------------------# # only use more recent and up to date repository arrow.git # build arrow shared/static libraries @@ -56,6 +93,4 @@ example : # set environment variable LD_LIBRARY_PATH=../../../../cpp/build/release/ before launching executable - - - +#------------------------------------------------------------------------------------# diff --git a/lib/parquet/reader-writer.cc b/lib/parquet/reader-writer.cc index 09cd137..68ab579 100644 --- a/lib/parquet/reader-writer.cc +++ b/lib/parquet/reader-writer.cc @@ -56,14 +56,14 @@ int main(int argc, char** argv) { // Create a local file output stream instance. using FileClass = ::arrow::io::FileOutputStream; std::shared_ptr out_file; - PARQUET_THROW_NOT_OK(FileClass::Open(PARQUET_FILENAME, &out_file)); + PARQUET_ASSIGN_OR_THROW(out_file, FileClass::Open(PARQUET_FILENAME)); // Setup the parquet schema std::shared_ptr schema = SetupSchema(); // Add writer properties parquet::WriterProperties::Builder builder; - builder.compression(parquet::Compression::SNAPPY); + builder.compression(parquet::Compression::UNCOMPRESSED); std::shared_ptr props = builder.build(); // Create a ParquetFileWriter instance @@ -205,6 +205,8 @@ int main(int argc, char** argv) { int i; std::shared_ptr column_reader; + ARROW_UNUSED(rows_read); // prevent warning in release build + // Get the Column Reader for the boolean column column_reader = row_group_reader->Column(0); parquet::BoolReader* bool_reader = @@ -292,6 +294,7 @@ int main(int argc, char** argv) { assert(values_read == 1); // Verify the value written parquet::Int96 expected_value; + ARROW_UNUSED(expected_value); // prevent warning in release build expected_value.value[0] = i; expected_value.value[1] = i + 1; expected_value.value[2] = i + 2; @@ -359,6 +362,7 @@ int main(int argc, char** argv) { assert(rows_read == 1); // Verify the value written char expected_value[FIXED_LENGTH] = "parquet"; + ARROW_UNUSED(expected_value); // prevent warning in release build expected_value[7] = static_cast('0' + i / 100); expected_value[8] = static_cast('0' + (i / 10) % 10); expected_value[9] = static_cast('0' + i % 10); diff --git a/lib/parquet/reader-writer.o b/lib/parquet/reader-writer.o new file mode 100644 index 0000000..e575325 Binary files /dev/null and b/lib/parquet/reader-writer.o differ diff --git a/lib/parquet/reader_writer.h b/lib/parquet/reader_writer.h index 3fda0cf..1ffc46e 100644 --- a/lib/parquet/reader_writer.h +++ b/lib/parquet/reader_writer.h @@ -21,7 +21,7 @@ #include #include -using parquet::LogicalType; +using parquet::ConvertedType; using parquet::Repetition; using parquet::Type; using parquet::schema::GroupNode; @@ -34,34 +34,34 @@ static std::shared_ptr SetupSchema() { // Create a primitive node named 'boolean_field' with type:BOOLEAN, // repetition:REQUIRED fields.push_back(PrimitiveNode::Make("boolean_field", Repetition::REQUIRED, - Type::BOOLEAN, LogicalType::NONE)); + Type::BOOLEAN, ConvertedType::NONE)); // Create a primitive node named 'int32_field' with type:INT32, repetition:REQUIRED, // logical type:TIME_MILLIS fields.push_back(PrimitiveNode::Make("int32_field", Repetition::REQUIRED, Type::INT32, - LogicalType::TIME_MILLIS)); + ConvertedType::TIME_MILLIS)); // Create a primitive node named 'int64_field' with type:INT64, repetition:REPEATED fields.push_back(PrimitiveNode::Make("int64_field", Repetition::REPEATED, Type::INT64, - LogicalType::NONE)); + ConvertedType::NONE)); fields.push_back(PrimitiveNode::Make("int96_field", Repetition::REQUIRED, Type::INT96, - LogicalType::NONE)); + ConvertedType::NONE)); fields.push_back(PrimitiveNode::Make("float_field", Repetition::REQUIRED, Type::FLOAT, - LogicalType::NONE)); + ConvertedType::NONE)); fields.push_back(PrimitiveNode::Make("double_field", Repetition::REQUIRED, Type::DOUBLE, - LogicalType::NONE)); + ConvertedType::NONE)); // Create a primitive node named 'ba_field' with type:BYTE_ARRAY, repetition:OPTIONAL fields.push_back(PrimitiveNode::Make("ba_field", Repetition::OPTIONAL, Type::BYTE_ARRAY, - LogicalType::NONE)); + ConvertedType::NONE)); // Create a primitive node named 'flba_field' with type:FIXED_LEN_BYTE_ARRAY, // repetition:REQUIRED, field_length = FIXED_LENGTH fields.push_back(PrimitiveNode::Make("flba_field", Repetition::REQUIRED, - Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE, + Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::NONE, FIXED_LENGTH)); // Create a GroupNode named 'schema' using the primitive nodes defined above diff --git a/lib/parquet/setup-sources.sh b/lib/parquet/setup-sources.sh new file mode 100644 index 0000000..a9bf588 --- /dev/null +++ b/lib/parquet/setup-sources.sh @@ -0,0 +1 @@ +#!/bin/bash