#!/usr/bin/env ruby # Blogger to Typo converter by Ric Roberts. I make no guarantees about this script - please be careful! # # (See http://ricroberts.com/articles/2008/05/27/blogger-to-typo-migration for more details) # # Based on blogger.rb from http://andthennothing.net/archives/2006/01/31/blogger-to-typo-migration # # The input can be created by using this template in blogger. # # # # # <$BlogItemNumber$> # <$BlogItemTitle$> # ]]> # <$BlogItemDateTime$> # # # # ]]> # <$BlogCommentDateTime$> # <$BlogCommentAuthor$> # # # # # # require 'optparse' require 'rexml/document' require 'find' class BloggerMigrate attr_accessor :options def initialize @options = {} @imported_ids = [] @comments_count = 0 parse_options require @options[:env_path] #wait to load environment until we know that all options are provided to speed up things convert_files true unless @options.include? :skip_test convert_files false puts (@options.include?(:test_only) ? "Would import" : "Imported") + " #{@imported_ids.size} entries, and #{@comments_count} comments." end def convert_files test_only Find.find(@options[:files]) do |f| next unless File.file? f next unless f =~ /\.xml$/i puts (test_only ? "Checking " : "Importing ") + f d = REXML::Document.new(File.open(f)) convert_entries d unless test_only end end def convert_entries doc doc.elements.each('//post') do |post| id = post.elements['id'].text.to_i if @imported_ids.include? id puts "Skip already imported entry." next end @imported_ids << id art = Article.new art.author = @options[:author] art.user_id = @options[:author_id] art.title = post.elements['title'].text if @options[:label_exclude] raw_body = post.elements['body'].text index_of_labels = raw_body.index('

') index_of_labels = 0 unless index_of_labels art.body = raw_body[0, index_of_labels] else art.body = post.elements['body'].text end art.created_at = DateTime.parse(post.elements['postedon'].text) art.allow_comments = 1 art.allow_pings = 1 art.published = 1 art.state = 'published' art.published_at = art.created_at puts "Importing #{art.title}..." art.save unless @options.include? :test_only post.elements.each('comments') do |comments| puts " ... and #{comments.elements.size} comments..." if comments.elements.size > 0 comments.elements.each('comment') do |comment| c = Comment.new c.article_id = art.id c.body = comment.elements['body'].text c.created_at = DateTime.parse(comment.elements['postedon'].text) c.published = 1 c.state = 'ham' if comment.elements['author'].elements['a'] c.url = comment.elements['author'].elements['a'].attributes['href'] if @options.include?(:comment_url) c.author = comment.elements['author'].elements['a'].text else c.author = comment.elements['author'].elements[".//span[@class='anon-comment-author']"].text end unless @options.include? :test_only c.save! end @comments_count += 1 end end end end def parse_options OptionParser.new do |opt| opt.banner = 'Usage: blogger.rb [options]. author, authorid, files and envpath are mandatory.' opt.on('-a', '--author AUTHOR', 'Username of author in typo') do |a| self.options[:author] = a end opt.on('-i', '--authorid AUTHORID', 'The id of the author in typo') do |i| self.options[:author_id] = i end opt.on('-f', '--files FILES', 'A filename or a directory (N.B. works recursively). Only .xml files will be imported.') do |f| self.options[:files] = f end opt.on('-s', '--skiptest', 'Skips checking that the XML files are valid before importing. Dangerous!') do self.options[:skip_test] = true end opt.on('-t', '--testonly', "Doesn't import anything, just checks the XML files.") do self.options[:test_only] = true end opt.on('-u', '--commenturl', "include the url of commenters") do self.options[:comment_url] = true end opt.on('-e', '--envpath ENVPATH', "path to environment location") do |e| self.options[:env_path] = e end opt.on('-l', '--labelexclude', "exclude blogger labels") do self.options[:label_exclude] = true end opt.on_tail('-h', '--help', 'Show this message.') do puts opt exit end opt.parse!(ARGV) end unless @options.include?(:author) and @options.include?(:author_id) and @options.include?(:files) and @options.include?(:env_path) puts 'Invalid arguments. See blogger.rb --help for help.' exit end end end BloggerMigrate.new