Revision: 35390
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at November 6, 2010 11:47 by sambou
Initial Code
private int WordCount(string Passage)
{
if (Passage.Trim().Length > 0)
{
//Replace Carriage returns, tabs and Line Feeds
string temp;
//Create array to hold the split results from the normal string object
string[] tempSplit;
//Create a character delimiter (space) for split function. This will define the number of words.
char[] Seperator = { ' ' };
//Replace Carriage Returns
temp = Passage.Replace((char)13, ' ');
//Replace Line Feeds
temp = temp.Replace((char)10, ' ');
//Replace Tabs, vertical
temp = temp.Replace((char)11, ' ');
//Get rid of all spaces
temp = temp.Replace(" ", " ");
temp = temp.Trim();
//shrink all multi-spaces to single spaces This uses the regular expression NameSpace to find
//all instances where a space occurs more than once (2 or more)
temp = Regex.Replace(temp, @"\s{2,}", " ");
// This will replace any text which begins and ends with <> and has at least one character in the middle
temp = Regex.Replace(temp, @"<(.|\n)+?>", "");
// Now replace the actual less-than, greater-than characters with their HTML encoded forms.
temp = Regex.Replace(temp, @"<", "<");
temp = Regex.Replace(temp, @">", ">");
//set the string array = to the results from the split of the original string (now devoid of all obstructive characters)
tempSplit = temp.Split(Seperator);
int word_count = 0;
bool btwn_brackets = false;
// ignore all words between square brackets [ ]
foreach (string word in tempSplit)
{
if (word.Contains("[") && word.Contains("]"))
{
btwn_brackets = false; // one word in brackets
word_count--;
}
else if (word.Contains("[") && !word.Contains("]"))
{
btwn_brackets = true; // start of brackets
}
else if (!word.Contains("[") && word.Contains("]"))
{
btwn_brackets = false; // end of brackets
}
else
{
if (!btwn_brackets)
{
word_count++; // no brackets
}
}
}
//finally, return the length of the array, this will be the count of words, in English
return word_count;
}
else
{
return 0;
}
}
Initial URL
Initial Description
Accurate word count matches that within Microsoft Word. Additional feature that ignores all words within square brackets [ ], which were used for comments. Remove the ignore block if this feature is not required.
Initial Title
Returns a word count in a text block, while ignoring words within square brackets
Initial Tags
Initial Language
C#