123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149 |
- # notes / manual steps:
- #
- # 0. load ppt into libre office, and save as odp and export as html; both
- # formats should be placed into a working directory with this structure:
- # working_dir/
- # exported_formats/exported_odp/
- # exported_formats/exported_html/
- # import_ppt_script.rb
- #
- # 1. Run "ruby import_ppt_script.rb"
- #
- # 2. Import zip file into D2P2
- # setup all the files that will be needed for the zip
- csv_filenames = [
- "Answers.csv",
- "Pages.csv",
- "PagewiseSkills.csv",
- "Questions.csv",
- "QuestionSetQuestions.csv",
- "QuestionSets.csv",
- "QuestionwiseSkills.csv",
- "QuizQuestionSets.csv",
- "Quizzes.csv",
- "Sections.csv",
- "Skills.csv",
- "Tutors.csv"
- ]
- # create all the files we would have, had we exported a tutor from D2P
- csv_filenames.collect{ |f| `touch #{f}`}
- # this will be replaced by the import routine, just using this to avoid clashes
- tutor_id = 999
- # build date string with ruby
- date_string = Time.now.utc
- # build all the file headers
- answers_csv_header = "id,body,correct,question_id,created_at,position,description,image\n"
- pages_csv_header = "id,tutor_id,section_id,name,description,content,position,created_at,quiz_id\n"
- pagewiseskills_csv_header = "id,page_id,skill_id,created_at,tutor_id\n"
- questions_csv_header = "id,body,tutor_id,created_at,feedback,question_type\n"
- questionsetquestions_csv_header = "id,question_set_id,question_id,position\n"
- questionsets_csv_header = "id,name,body,duration,tutor_id,created_at\n"
- questionwiseskills_csv_header = "id,tutor_id,question_id,skill_id,created_at\n"
- quizquestionsets_csv_header = "id,quiz_id,question_set_id,position\n"
- quizzes_csv_header = "id,name,splash_text,feedback_after_question_set,feedback_after_quiz,question_set_position,tutor_id,created_at\n"
- sections_csv_header = "id,name,tutor_id,position,created_at\n"
- skills_csv_header = "id,name,skill_type,created_at,tutor_id,notes\n"
- tutors_csv_header = "id,user_id,name,tagline,about,media_directory,created_at,navBarColor,backgroundImage,adaptive,logoImage,private,progressive\n"
- csv_headers = [
- answers_csv_header,
- pages_csv_header,
- pagewiseskills_csv_header,
- questions_csv_header,
- questionsetquestions_csv_header,
- questionsets_csv_header,
- questionwiseskills_csv_header,
- quizquestionsets_csv_header,
- quizzes_csv_header,
- sections_csv_header,
- skills_csv_header,
- tutors_csv_header
- ]
- #zip headers together with their name for easier enum writing
- files_with_headers = Hash[csv_filenames.zip(csv_headers)]
- # write all the file headers into the files
- files_with_headers.keys.each do |f|
- File.write( f, files_with_headers[f] )
- end
- # Sections.csv
- # defines the structure of the tutor; we create single dummy section
- sections_write_string = "1,PlaceHolderSection,#{tutor_id},1,#{date_string}"
- open('Sections.csv', 'a') { |f|
- f << sections_write_string
- }
- # 1. using exported ODF presentation (.odp)
- # copy all pictures to the uploaded images dir in working dir;
- `cp -r exported_formats/exported_odp/Pictures/ uploaded_images/`
- # get all the presentation text from the xml file
- full_presentation_contents = File.read('exported_formats/exported_odp/content.xml')
- # split the pres using 'draw:name...'
- split_presentation_contents_array = full_presentation_contents.split('draw:name="page')
- # use regex to find which page a picture occurs on
- page_num_regex = /Pictures\/.*?"/
- matches = []
- scanned_array = split_presentation_contents_array[1..-1].each do |a|
- match = a.scan(page_num_regex)
- # remove the trailing quote that the regex grabbed
- matches << match.map!{|m| m.chomp('"')}
- end
- # 2. using the exported html from libreoffice
- # libreoffice outputs each slide as an text{n}.html file; turn each of these into a csv file
- text_html_files = Dir[ "exported_formats/exported_html/text*"].sort_by{|s| s[/\d+/].to_i }
- # grab content from textN.html file
- text_html_files.each_with_index do |filename, index|
- f = File.open(filename, "r")
- f_text = ""
- f.each_line do |l|
- f_text += l.strip
- end
- f.close()
- # look for the end of the header in the ppt, and grab until end of body
- # this will pull the main content from the html-formatted slide
- page_content = f_text.scan(/\/h1\>(.+)\<\/body/).to_s
- page_content.gsub!(","," ")
- page_content = page_content[3..-4]
- matches[index].each do |m|
- m.gsub!("Pictures/","")
- img_write_string = "<p><img src='/d2p2/uploaded_images/target/#{m}'></p>"
- page_content << img_write_string
- end
- # for page 0 (the title page), write to Tutors.csv
- if index == 0
- open('Tutors.csv', 'a') { |f|
- page_write_string = "#{tutor_id},20,Content,,#{page_content},#{index + 1},#{date_string}"
- f << page_write_string
- }
- else
- open('Pages.csv', 'a') { |f|
- page_write_string = "#{index},#{tutor_id},1,Content,,#{page_content},#{index},#{date_string},\n"
- f << page_write_string
- }
- end
- end
- # wrap all the csv files into a zip
- p `zip tutor_to_upload.zip *csv uploaded_images/*`
- csv_filenames.each do |f|
- File.delete(f)
- end
- `rm -rf uploaded_images/`
|