#!/usr/bin/env ruby # Ag -- archiving all the 'golden' flamewars on -dev # Alex Legler $VERBOSE=nil require 'bundler/setup' require 'mail' require 'maildir' require 'elasticsearch' require 'optparse' require 'parallel' require 'ruby-progressbar' require_relative 'lib/utils' require_relative 'lib/threading' require_relative 'lib/rendering' require_relative 'lib/storage' require_relative 'lib/hotfixes' $options = OpenStruct.new $options.action = nil $options.name = nil $options.index_only = false $options.no_threading = false $options.debug = false $options.readonly = false $options.jobs = false $options.progress = true $options.need_argument = true $options.argmode = nil op = OptionParser.new do |opts| opts.banner = "Usage: ag <<--index-full|--index-new|--delete-msg|--delete-index|--reindex|--rethread|--info> <--list listname>> <[--file|--msgid|--hash] > [options]" opts.on('--index-full', 'Read the full past archive from Maildir/cur. Needs --list and a Maildir') do abort 'Can only select one action' if $options.action != nil $options.action = :do_full $options.argmode = :dir end opts.on('--index-new', 'Read new messages from Maildir/new and move them to Maildir/cur. Needs --list and a Maildir') do abort 'Can only select one action' if $options.action != nil $options.action = :do_incremental $options.argmode = :dir end opts.on('--delete-msg', 'Delete message. Needs --list and one of --file, --msgid, or --hash') do abort 'Can only select one action' if $options.action != nil $options.action = :do_delete_msg end opts.on('--create-index', 'Create index but do not populate. Needs --list') do abort 'Can only select one action' if $options.action != nil $options.action = :do_create_index $options.need_argument = false end opts.on('--rethread', 'Rethread messages. Needs --list') do abort 'Can only select one action' if $options.action != nil $options.action = :do_rethread $options.need_argument = false end opts.on('--delete-index', 'Delete index. Needs --list') do abort 'Can only select one action' if $options.action != nil $options.action = :do_delete_index $options.need_argument = false end opts.on('--info', 'Display message details. Needs --list and one of --file, --msgid, or --hash') do abort 'Can only select one action' if $options.action != nil $options.action = :do_info end opts.on('--reindex', 'Reindex message. Needs --list and --file') do abort 'Can only select one action' if $options.action != nil $options.action = :do_reindex end opts.on('--list NAME', 'Name of the mailing list to work with') do |name| if name =~ /^[0-9a-zA-Z-]+$/ $options.name = name else abort 'List name can only consist of letters, numbers and hyphens.' end end opts.on('--file', 'The argument is a file') do $options.argmode = :file end opts.on('--msgid', 'The argument is a Message-Id') do $options.argmode = :msgid end opts.on('--hash', 'The argument is a X-Archives-Hash') do $options.argmode = :hash end opts.on('--index-only', 'Only delete the message from the index, not from disk') do $options.index_only = true end opts.on('--no-threading', 'Only index, don\'t update threading') do $options.no_threading = true end opts.on('--debug', 'Print debug messages') do $options.debug = true end opts.on('--readonly', 'Do not alter the maildir in any way') do $options.readonly = true end opts.on('--jobs JOBS', 'Number of parallel jobs to run (defaults to 75% of core count)') do |jobs| $options.jobs = jobs.to_i end opts.on('--progress', 'Display the progress bar') do $options.progress = true end opts.on('--no-progress', 'Do not display the progress bar') do $options.progress = false end end op.parse! abort op.help unless $options.action abort 'List name required' unless $options.name $options.dir = ARGV[0] or abort 'Need a Maildir/File/Hash/Message-Id to work with' if $options.need_argument if($options.argmode == :dir) # Open maildir and set serializer $maildir = Maildir.new(File.join($options.dir), false) $maildir.serializer = Maildir::Serializer::Mail.new end # Connect to Elasticsearch $es = Elasticsearch::Client.new(log: false) $es.transport.reload_connections! Ag::Utils.proc_count = $options.jobs ############################################################################### def do_full abort "Wrong argument type: #{$options.argmode.to_s}" unless $options.argmode == :dir begin Ag::Storage.delete_index($options.name) rescue Elasticsearch::Transport::Transport::Errors::NotFound => e $stderr.puts "Index did not exist yet. Creating." if $options.debug end Ag::Storage.create_index($options.name) messages = $maildir.list(:cur) opts = { :in_processes => Ag::Utils.proc_count, } opts[:progress] = "Importing #{$options.name}" if $options.progress Parallel.each(messages, opts) do |maildir_message| mail = maildir_message.data begin Ag::Storage.store($options.name, mail, maildir_message.unique_name) rescue => e $stderr.puts "Cannot save message #{mail.message_id}: (#{e.class}) #{e.message}" if $options.debug next end end do_rethread end def do_incremental abort "Wrong argument type: #{$options.argmode.to_s}" unless $options.argmode == :dir messages = $maildir.list(:new) opts = { :in_processes => Ag::Utils.proc_count, } opts[:progress] = "Importing #{$options.name}" if $options.progress Parallel.each(messages, opts) do |maildir_message| mail = maildir_message.data begin Ag::Storage.store($options.name, mail, maildir_message.unique_name) maildir_message.process unless $options.readonly rescue => e $stderr.puts "Cannot save message #{mail.message_id} (file #{maildir_message.filename}): #{e.message}" if $options.debug next end end do_rethread end def do_rethread Ag::Threading.calc($options.name) unless $options.no_threading end def do_delete_msg id = Ag::Utils.resolve_id begin Ag::Storage.delete($options.name, id) rescue => e $stderr.puts "Cannot delete message: #{e}" end end def do_delete_index begin Ag::Storage.delete_index($options.name) rescue Elasticsearch::Transport::Transport::Errors::NotFound => e $stderr.puts "Index does not exist: #{e}" rescue => e $stderr.puts "Cannot delete index: #{e}" end end def do_create_index Ag::Storage.create($options.name) end def do_reindex # http://babinho.net/2014/07/refresh-your-elasticsearch-index-with-zero-downtime/ abort 'Come back later.' end def do_info id = Ag::Utils.resolve_id begin message = Ag::Storage.get($options.name, id) raise 'No such message' unless message require 'pp' str = "Message #{id}" $stderr.puts str $stderr.puts '-' * str.length pp message['_source'] rescue => e $stderr.puts "Cannot display message: #{e}" end end ############################################################################### if self.private_methods.include? $options.action send $options.action else abort "Internal Error: Unknown action: #{$options.action}" end # vim: ts=2 sts=2 et ft=ruby: