diff --git a/lib/parquet/generate_deps.py b/lib/parquet/generate_deps.py new file mode 100644 index 0000000..818b569 --- /dev/null +++ b/lib/parquet/generate_deps.py @@ -0,0 +1,26 @@ +#-----------------------------------------------------------------------------# + +import argparse + +#-----------------------------------------------------------------------------# + +parser = argparse.ArgumentParser(description='List all source dependencies') +parser.add_argument('pathToRepo',type=str,help='path of source repository') +parser.add_argument('mainSource',type=str,help='main source file') +parser.add_argument('depFile',type=str,help='file listing all dependencies') +args = parser.parse_args() + +#-----------------------------------------------------------------------------# + +def find_dependencies(srcfile, repopath) : + """ + Given a source file and its dependencies in the given repository path + list all further dependencies recursively + """ + +if __name__== "__main__": + + print("\nCLI arguments:\n" + str(args)) + + +#-----------------------------------------------------------------------------# diff --git a/lib/parquet/setup-sources.sh b/lib/parquet/setup-sources.sh index f061230..8cc54bc 100755 --- a/lib/parquet/setup-sources.sh +++ b/lib/parquet/setup-sources.sh @@ -1,70 +1,171 @@ #!/bin/bash +#-----------------------------------------------------------------------------# + +# NOTE: before starting to extract the minimal required sources and dependencies +# run +# $ cd cpp/ +# $ cmake -D ARROW_PARQUET=ON +# in the arrow repository + # provide # - local path of clone of https://github.com/apache/arrow.git # - name/path of main .hpp file of cython extension repo="$1" main="$2" +depf="$3" # check CLI arguments -if [ -z "$repo" ] || [ -z "$main" ]; then - echo "please provide local path of arrow repository and name of main .hpp/.cpp" - echo -e "example:\n./setup-sources.sh /home/mario/Desktop/Record_Evolution/arrow/ reader-writer.cc" +if [ -z "$repo" ] || [ -z "$main" ] || [ -z "$depf" ]; then + echo "please provide..." + echo "1. local path of arrow repository" + echo "2. name of main .hpp/.cpp" + echo "3. desired name of dependency file" + echo -e "example:\n./setup-sources.sh /home/mario/Desktop/Record_Evolution/arrow/ reader-writer.cc deps.log" exit 1 fi -echo -e "extracting sources from/for \n1: ${repo}\n2: ${main}\n" +echo -e "extracting sources from/for \n1: ${repo}\n2: ${main}\n3: ${depf}\n" -# generate dependency file -echo -e "generate dependencies:\ng++ -c -MMD ${main} -I ./ -I ${repo}cpp/src/\n" -g++ -c -MMD ${main} -I ${repo}cpp/src/ +# make sure the dependency file is empty +rm -f ${depf} +touch ${depf} -# derive name of dependency file -dep=$(echo ${main} | sed 's/.cc/.d/g') +# define maximal recursion depth +maxdep=8 -if [ -f "$dep" ]; then +#-----------------------------------------------------------------------------# - # list dependencies - cat ${dep} | sed 's/ /\n/g' | awk 'NF' | grep -v '\\' | grep '\/' > deps.log +# define function to list dependencies of source file in repository recursively +listDependencies() +{ + rep="$1" + src="$2" + dep="$3" + rec="$4" - # extract list of headers - cat deps.log | sed ':a;N;$!ba;s/\n/ /g' > deps-headers.log - echo "list of required headers ($(cat deps.log | wc -l))" - cat deps-headers.log - echo "" + echo -e "\nstarting 'listDependencies()' for\n1. ${rep}\n2. ${src}\n3. ${dep}\n4. ${rec}" - # imply list of sources - cat deps.log | sed 's/.h$/.cc/g' | sed 's/.hpp$/.cpp/g' > sources_raw.log - cat sources_raw.log | while read f + # generate dependency file (and remove resulting object file) + echo -e "g++ -c -MMD ${src} -I ${rep}cpp/src/\n" + g++ -c -MMD ${src} -I ${rep}cpp/src/ + + # derive name of dependency and object files + depf=$(basename ${src} | sed 's/.cc/.d/g') + objf=$(basename ${src} | sed 's/.cc/.o/g') + rm ${objf} + + # list dependencies by + # 1. removing header + # 2. remove source itself + # 3. delete leading spaces + # 4. delete trailing backslashs + # 5. remove empty lines + cat ${depf} | grep ${rep} | grep -v ${src} | tr -d "^ " | tr -d "\\\\" | awk 'NF' > listdep.log + # rm ${depf} + + while IFS= read -r fs do - if [ -f "$f" ]; then - echo $f >> sources_check.log + echo "$fs" + # check if dependency is already in the list + if grep -Fxq "$fs" "$dep" + then + echo "dep exist" + else + echo "dep does not exist yet => adding it" + # add dependency to list + echo "$fs" >> ${dep} + # check for corresponding source file + fssourc=$(echo ${fs} | sed 's/.h$/.cc/g' | sed 's/.hpp$/.cpp/g') + echo ${fssourc} + if [ -f "$fssourc" ] + then + echo "source file exists" + # list nested dependencies + if [ "$rec" -lt "$maxdep" ] + then + # increment recursion depth + recinc=$(($rec+1)) + # call recursion + listDependencies ${rep} ${fssourc} ${dep} ${recinc} + else + echo "maximal recursion depth exceeded" + fi + else + echo "source file does not exist" + fi fi - done - cat sources_check.log | sed ':a;N;$!ba;s/\n/ /g' > deps-sources.log - echo "list of required sources ($(cat sources_check.log | wc -l))" - cat deps-sources.log - echo "" + echo "" + done < listdep.log - # remove all temporary files - rm ${dep} deps.log sources_raw.log sources_check.log + # cat listdep.log | while read fs + # do + # echo $fs + # # check if dependency is already in the list + # inlist=$(cat listdep.log | grep ${fs} | wc -l) + # echo ${inlist} + # # check for any corresponding source files + # # if [ -f ] + # done +} - # copy required headers and sources - echo -e "copy required headers and sources" - mkdir temp/ - cp --parents `cat deps-headers.log` temp/ - cp --parents `cat deps-sources.log` temp/ - mv temp${repo}cpp/src/* ./ - rm -r temp +#-----------------------------------------------------------------------------# - # remove dependencies - rm deps-headers.log deps-sources.log +# call function to list dependencies (recursively) +listDependencies ${repo} ${main} ${depf} 0 - # show files - ls -lh - -else - - echo -e "\nERROR: failed to generate dependency file\n" - -fi +# # generate dependency file (and remove resulting object file) +# echo -e "generate dependencies:\ng++ -c -MMD ${main} -I ./ -I ${repo}cpp/src/\n" +# g++ -c -MMD ${main} -I ${repo}cpp/src/ +# rm $(echo ${main} | sed 's/.cc/.o/g') +# +# # derive name of dependency file +# dep=$(echo ${main} | sed 's/.cc/.d/g') +# +# if [ -f "$dep" ]; then +# +# # list dependencies +# cat ${dep} | sed 's/ /\n/g' | awk 'NF' | grep -v '\\' | grep '\/' > deps.log +# +# # extract list of headers +# cat deps.log | sed ':a;N;$!ba;s/\n/ /g' > deps-headers.log +# echo "list of required headers ($(cat deps.log | wc -l))" +# cat deps-headers.log +# echo "" +# +# # imply list of sources +# cat deps.log | sed 's/.h$/.cc/g' | sed 's/.hpp$/.cpp/g' > sources_raw.log +# cat sources_raw.log | while read f +# do +# if [ -f "$f" ]; then +# echo $f >> sources_check.log +# fi +# done +# cat sources_check.log | sed ':a;N;$!ba;s/\n/ /g' > deps-sources.log +# echo "list of required sources ($(cat sources_check.log | wc -l))" +# cat deps-sources.log +# echo "" +# +# # remove all temporary files +# rm ${dep} deps.log +# rm sources_raw.log sources_check.log +# +# # copy required headers and sources +# echo -e "copy required headers and sources" +# mkdir temp/ +# cp --parents `cat deps-headers.log` temp/ +# cp --parents `cat deps-sources.log` temp/ +# mv temp${repo}cpp/src/* ./ +# rm -r temp +# +# # remove dependencies +# #rm deps-headers.log deps-sources.log +# +# # show files +# ls -lh +# +# else +# +# echo -e "\nERROR: failed to generate dependency file\n" +# +# fi diff --git a/setup.py b/setup.py index f72b96f..005883a 100644 --- a/setup.py +++ b/setup.py @@ -11,6 +11,7 @@ extensions = Extension( language='c++', extra_compile_args=['-std=c++11','-Wno-unused-variable'], extra_link_args=['-std=c++11'], + #extra_objects=["lib/parquet/libarrow.so.200.0.0"], ) setup(