import_ppt_script.rb 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. # notes / manual steps:
  2. #
  3. # 0. load ppt into libre office, and save as odp and export as html; both
  4. # formats should be placed into a working directory with this structure:
  5. # working_dir/
  6. # exported_formats/exported_odp/
  7. # exported_formats/exported_html/
  8. # import_ppt_script.rb
  9. #
  10. # 1. Run "ruby import_ppt_script.rb"
  11. #
  12. # 2. Import zip file into D2P2
  13. # setup all the files that will be needed for the zip
  14. csv_filenames = [
  15. "Answers.csv",
  16. "Pages.csv",
  17. "PagewiseSkills.csv",
  18. "Questions.csv",
  19. "QuestionSetQuestions.csv",
  20. "QuestionSets.csv",
  21. "QuestionwiseSkills.csv",
  22. "QuizQuestionSets.csv",
  23. "Quizzes.csv",
  24. "Sections.csv",
  25. "Skills.csv",
  26. "Tutors.csv"
  27. ]
  28. # create all the files we would have, had we exported a tutor from D2P
  29. csv_filenames.collect{ |f| `touch #{f}`}
  30. # this will be replaced by the import routine, just using this to avoid clashes
  31. tutor_id = 999
  32. # build date string with ruby
  33. date_string = Time.now.utc
  34. # build all the file headers
  35. answers_csv_header = "id,body,correct,question_id,created_at,position,description,image\n"
  36. pages_csv_header = "id,tutor_id,section_id,name,description,content,position,created_at,quiz_id\n"
  37. pagewiseskills_csv_header = "id,page_id,skill_id,created_at,tutor_id\n"
  38. questions_csv_header = "id,body,tutor_id,created_at,feedback,question_type\n"
  39. questionsetquestions_csv_header = "id,question_set_id,question_id,position\n"
  40. questionsets_csv_header = "id,name,body,duration,tutor_id,created_at\n"
  41. questionwiseskills_csv_header = "id,tutor_id,question_id,skill_id,created_at\n"
  42. quizquestionsets_csv_header = "id,quiz_id,question_set_id,position\n"
  43. quizzes_csv_header = "id,name,splash_text,feedback_after_question_set,feedback_after_quiz,question_set_position,tutor_id,created_at\n"
  44. sections_csv_header = "id,name,tutor_id,position,created_at\n"
  45. skills_csv_header = "id,name,skill_type,created_at,tutor_id,notes\n"
  46. tutors_csv_header = "id,user_id,name,tagline,about,media_directory,created_at,navBarColor,backgroundImage,adaptive,logoImage,private,progressive\n"
  47. csv_headers = [
  48. answers_csv_header,
  49. pages_csv_header,
  50. pagewiseskills_csv_header,
  51. questions_csv_header,
  52. questionsetquestions_csv_header,
  53. questionsets_csv_header,
  54. questionwiseskills_csv_header,
  55. quizquestionsets_csv_header,
  56. quizzes_csv_header,
  57. sections_csv_header,
  58. skills_csv_header,
  59. tutors_csv_header
  60. ]
  61. #zip headers together with their name for easier enum writing
  62. files_with_headers = Hash[csv_filenames.zip(csv_headers)]
  63. # write all the file headers into the files
  64. files_with_headers.keys.each do |f|
  65. File.write( f, files_with_headers[f] )
  66. end
  67. # Sections.csv
  68. # defines the structure of the tutor; we create single dummy section
  69. sections_write_string = "1,PlaceHolderSection,#{tutor_id},1,#{date_string}"
  70. open('Sections.csv', 'a') { |f|
  71. f << sections_write_string
  72. }
  73. # 1. using exported ODF presentation (.odp)
  74. # copy all pictures to the uploaded images dir in working dir;
  75. `cp -r exported_formats/exported_odp/Pictures/ uploaded_images/`
  76. # get all the presentation text from the xml file
  77. full_presentation_contents = File.read('exported_formats/exported_odp/content.xml')
  78. # split the pres using 'draw:name...'
  79. split_presentation_contents_array = full_presentation_contents.split('draw:name="page')
  80. # use regex to find which page a picture occurs on
  81. page_num_regex = /Pictures\/.*?"/
  82. matches = []
  83. scanned_array = split_presentation_contents_array[1..-1].each do |a|
  84. match = a.scan(page_num_regex)
  85. # remove the trailing quote that the regex grabbed
  86. matches << match.map!{|m| m.chomp('"')}
  87. end
  88. # 2. using the exported html from libreoffice
  89. # libreoffice outputs each slide as an text{n}.html file; turn each of these into a csv file
  90. text_html_files = Dir[ "exported_formats/exported_html/text*"].sort_by{|s| s[/\d+/].to_i }
  91. # grab content from textN.html file
  92. text_html_files.each_with_index do |filename, index|
  93. f = File.open(filename, "r")
  94. f_text = ""
  95. f.each_line do |l|
  96. f_text += l.strip
  97. end
  98. f.close()
  99. # look for the end of the header in the ppt, and grab until end of body
  100. # this will pull the main content from the html-formatted slide
  101. page_content = f_text.scan(/\/h1\>(.+)\<\/body/).to_s
  102. page_content.gsub!(","," ")
  103. page_content = page_content[3..-4]
  104. matches[index].each do |m|
  105. m.gsub!("Pictures/","")
  106. img_write_string = "<p><img src='/d2p2/uploaded_images/target/#{m}'></p>"
  107. page_content << img_write_string
  108. end
  109. # for page 0 (the title page), write to Tutors.csv
  110. if index == 0
  111. open('Tutors.csv', 'a') { |f|
  112. page_write_string = "#{tutor_id},20,Content,,#{page_content},#{index + 1},#{date_string}"
  113. f << page_write_string
  114. }
  115. else
  116. open('Pages.csv', 'a') { |f|
  117. page_write_string = "#{index},#{tutor_id},1,Content,,#{page_content},#{index},#{date_string},\n"
  118. f << page_write_string
  119. }
  120. end
  121. end
  122. # wrap all the csv files into a zip
  123. p `zip tutor_to_upload.zip *csv uploaded_images/*`
  124. csv_filenames.each do |f|
  125. File.delete(f)
  126. end
  127. `rm -rf uploaded_images/`