using System;
using System.Collections.Generic;
using System.IO;
using Latino;

namespace SvmPosTagger
{
    class Program
    {
        static bool m_verbose
            = false;

        static void OutputHelp()
        {
            Console.WriteLine("*** SVM POS Tagger 1.0 - Training Module ***");
            //Console.WriteLine("(SOURCEFORGE.NET URL)");
            Console.WriteLine();
            Console.WriteLine("Usage:");
            Console.WriteLine("SvmPosTaggerTrain [<options>] <corpus_file_name> <model_file_name>");
            Console.WriteLine();
            Console.WriteLine("<options>:          See below.");
            Console.WriteLine("<corpus_file_name>: Tagged corpus for training (input).");
            Console.WriteLine("<model_file_name>:  Trained model (output).");
            Console.WriteLine();
            Console.WriteLine("Options:");
            Console.WriteLine("-v             Verbose.");
            Console.WriteLine("               (default: not set; quiet mode)");
            Console.WriteLine("-c:<float>0>   SVM parameter; tradeoff between error and margin.");
            Console.WriteLine("               (default: 5000)");
            Console.WriteLine("-eps:<float>0> SVM parameter; termination criterion.");
            Console.WriteLine("               (default: 0.1)");
            Console.WriteLine("-mff:<int>0>   Minimum feature frequency.");
            Console.WriteLine("               (default: 2)");
        }

        static bool ParseParams(string[] args, ref bool verbose, ref double c, ref double eps, ref int min_feat_freq, ref string corpus_file_name, ref string model_file_name)
        {
            // parse
            for (int i = 0; i < args.Length - 2; i++)
            {
                string arg_lwr = args[i].ToLower();
                if (arg_lwr == "-v")
                {
                    verbose = true;
                }
                else if (arg_lwr.StartsWith("-c:"))
                {
                    try { c = Convert.ToDouble(arg_lwr.Split(':')[1]); }
                    catch { c = 0; }
                }
                else if (arg_lwr.StartsWith("-eps:"))
                {
                    try { eps = Convert.ToDouble(arg_lwr.Split(':')[1]); }
                    catch { eps = 0; }
                }
                else if (arg_lwr.StartsWith("-mff:"))
                {
                    try { min_feat_freq = Convert.ToInt32(arg_lwr.Split(':')[1]); }
                    catch { min_feat_freq = 0; }
                }
                else
                {
                    Console.WriteLine("*** Invalid option {0}.\r\n", args[i]);
                    OutputHelp();
                    return false;
                }
            }
            // verify settings
            if (c <= 0)
            {
                Console.WriteLine("*** Invalid -c value. Must be a floating-point number greater than zero.\r\n");
                OutputHelp();
                return false;
            }
            if (eps <= 0)
            {
                Console.WriteLine("*** Invalid -eps value. Must be a floating-point number greater than zero.\r\n");
                OutputHelp();
                return false;
            }
            if (min_feat_freq <= 0)
            {
                Console.WriteLine("*** Invalid -mff value. Must be an integer number greater than zero.\r\n");
                OutputHelp();
                return false;
            }
            // check file names
            corpus_file_name = args[args.Length - 2];
            model_file_name = args[args.Length - 1];
            if (!Utils.VerifyFileName(corpus_file_name, /*must_exist=*/true))
            {
                Console.WriteLine("*** Invalid corpus file name or file not found.\r\n");
                OutputHelp();
                return false;
            }
            if (!Utils.VerifyFileName(model_file_name, /*must_exist=*/false))
            {
                Console.WriteLine("*** Invalid model file name.\r\n");
                OutputHelp();
                return false;
            }
            return true;
        }

        static void Verbose(string text, params object[] args)
        {
            if (m_verbose)
            {
                Console.Write(text, args);
            }
        }

        static void Main(string[] args)
        {
            try
            {
                if (args.Length < 2)
                {
                    OutputHelp();
                }
                else
                {
                    DateTime now = DateTime.Now;
                    double c = 5000, eps = 0.1;
                    int min_feat_freq = 2;
                    string corpus_file_name = null, model_file_name = null;
                    if (ParseParams(args, ref m_verbose, ref c, ref eps, ref min_feat_freq, ref corpus_file_name, ref model_file_name))
                    {
                        Corpus corpus = new Corpus();
                        Verbose("Loading training corpus ...\r\n");
                        WordDictionary dictionary = corpus.LoadFromFile(corpus_file_name, /*create_dictionary=*/true);
                        Verbose("Initializing models ...\r\n");
                        ModelIndex kw_model = new ModelIndex(dictionary);
                        PosModel uw_model = new PosModel();
                        Dictionary<string, int> feature_space = new Dictionary<string, int>();
                        Verbose("Generating feature vectors ...\r\n");
                        for (int i = 0; i < corpus.TaggedWords.Count; i++)
                        {
                            Verbose("{0} / {1}\r", i + 1, corpus.TaggedWords.Count);
                            SparseVector2<double> feature_vector = corpus.GenerateFeatureVector(i, feature_space, /*extend_feature_space=*/true, dictionary);
                            kw_model.AddExample(corpus.TaggedWords[i].Tag, feature_vector);
                            if (dictionary.IsHiddenWord(corpus.TaggedWords[i].WordL))
                            {
                                uw_model.AddExample(corpus.TaggedWords[i].Tag, feature_vector);
                            }
                        }
                        Verbose("\r\n");
                        Verbose("Training models ...\r\n");
                        kw_model.Train(m_verbose, c, eps, min_feat_freq);
                        uw_model.Train(m_verbose, c, eps, min_feat_freq);
                        Verbose("Saving models ...\r\n");
                        string model_dir = new FileInfo(model_file_name).DirectoryName;
                        BinarySerializer writer = new BinarySerializer(model_file_name, FileMode.Create);
                        writer.DataDir = model_dir;
                        dictionary.Save(writer);
                        Utils.SaveDictionary(feature_space, writer);
                        kw_model.Save(writer);
                        uw_model.Save(writer);
                        writer.Close();
                        Console.WriteLine("Training time: {0} milliseconds.", (DateTime.Now - now).TotalMilliseconds);
                        Verbose("All done.\r\n");
                    }
                }
            }
            catch (Exception exception)
            {
                Console.WriteLine("*** Unexpected error occurred. Details: {0}\r\n{1}", exception, exception.StackTrace);   
            }
        }
    }
}
