Wednesday, 31 May 2017

Remove HTML all css Microsoft word (MS WORD) format

public static string StripHtml(string source)
    {
        source = Regex.Replace(source, "(<style.+?</style>)|(<script.+?</script>)", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);
        source = Regex.Replace(source, "(<img.+?>)", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);
        source = Regex.Replace(source, "(<o:.+?</o:.+?>)", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);
        source = Regex.Replace(source, "<!--.+?-->", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);
        source = Regex.Replace(source, "class=.+?>", ">", RegexOptions.IgnoreCase | RegexOptions.Singleline);

        return source = Regex.Replace(source.Replace(System.Environment.NewLine, "<br/>"), "<[^(a|img|b|i|u|ul|ol|li)][^>]*>", " ");

    }

No comments:

Post a Comment

Upload valid file in C#

    protected bool CheckFileExtandLength(HttpPostedFile HtmlDocFile)     {         try         {             Dictionary<string, byte[]...