count.cpp, WordFreq.cpp, wordfreq.cpp, wordfreq.cpp, ola.cpp, poe

Printed by Owen L. Astrachan
Mar 21, 07 11:16
5
#include
#include
#include
#include
#include
using namespace::std;
10
count.cpp
Page 1/1
<map>
<vector>
<algorithm>
<iostream>
<fstream>
Mar 21, 07 11:17
5
// What can I say, I’m lazy :−)
//
//
//
//
//
WordFreq.cpp
Page 1/2
Owen’s Simple Code program
See: http://www.cs.duke.edu/csed/code/
Reads a text file and reports on the frequency of word occurrence
ignoring case. Output reports on words in order of frequency with
most frequent words first
// By Beth Katz, March 20, 2007 ([email protected])
// order by freq, break ties by word
bool MoreFrequent(const pair<string, int> &one, const pair<string, int> &two)
{
return (one.second > two.second || (one.second == two.second && one.first <
two.first));
}
10
15
#include
#include
#include
#include
#include
<iostream>
<fstream>
<map>
<string>
<set>
// for standard input and output
// for input file
using namespace std;
15
20
int main(int argc, const char *argv[])
{
map<string, int> table;
ifstream in(argv[1]);
string word;
25
while (in >> word) {
transform(word.begin(), word.end(), word.begin(), (int(*)(int))tolowe
r); // convert tolower
table[word]++;
// increment freq (default val = 0)
}
// read in words counting them and recording frequency
void fillWordMap(ifstream & textFile, map<string, int> & uniqueWords);
20
25
// store words in sets of how frequently they appear
void buildFrequencyMap(const map<string, int> & uniqueWords,
map<int, set<string> > & freq);
// print the frequency list with most frequent words first
void printFrequency(const map<int, set<string> > & freq);
// convert s to lowercase
void lowerCase(string & s);
vector<pair<string, int> > pairs(table.begin(), table.end()); // copy to vec
tor
30
sort(pairs.begin(), pairs.end(), MoreFrequent);
for (int i = 0; i < pairs.size(); i++)
cout << pairs[i].second << ’\t’ << pairs[i].first << endl;
30
return 0;
int main(int argc, char *argv[]) {
ifstream textFile(argv[1]);
map<string, int> uniqueWords;
map<int, set<string> > freq;
fillWordMap(textFile, uniqueWords);
buildFrequencyMap(uniqueWords, freq);
printFrequency(freq);
35
}
textFile.close( );
return 0;
40
}
void fillWordMap(ifstream & textFile, map<string, int> & uniqueWords) {
string word;
45
while (textFile >> word) {
lowerCase(word);
uniqueWords[word]++;
}
50
}
void buildFrequencyMap(const map<string, int> & uniqueWords,
map<int, set<string> > & freq) {
map<string, int>::const_iterator it;
55
for (it = uniqueWords.begin( ); it != uniqueWords.end( ); ++it) {
freq[(*it).second].insert((*it).first);
}
}
60
void printFrequency(const map<int, set<string> > & freq) {
map<int, set<string> >::const_reverse_iterator fit;
for (fit = freq.rbegin( ); fit != freq.rend( ); ++fit) {
set<string> freqSet = (*fit).second;
set<string>::const_iterator sit;
65
for (sit = freqSet.begin( ); sit != freqSet.end( ); ++sit) {
cout << (*fit).first << "\t" << *sit << endl;
}
70
}
}
Friday March 23, 2007
1/count.cpp, 3/WordFreq.cpp
1/7
Printed by Owen L. Astrachan
Mar 21, 07 11:17
75
WordFreq.cpp
for (it = s.begin( ); it != s.end( ); ++it) {
*it = tolower(*it);
}
80
Page 2/2
Mar 21, 07 11:26
void lowerCase(string & s) {
string::iterator it;
5
}
10
15
20
25
wordfreq.cpp
Page 1/1
/*
* Short simple solution in C++ with no error checking
* J. S. Gray − University of Hartford, W. Hartford, CT
*/
#include <iostream>
#include <fstream>
using namespace std;
int
main(int argc, char *argv[]) {
char line[PIPE_BUF], *s1, *s2;
static char *delim = " <>|?\"#*()[]{}=%\\\t\n";
FILE *pfout;
pfout = popen(" sort | uniq −c | sort +0r −1 ", "w");
ifstream fin;
fin.open(argv[1], ios_base::in);
while(fin.getline(line, PIPE_BUF, ’\n’ ) != NULL ){
s1 = line;
for (int i=0; line[i]; ++i ) line[i] = tolower(line[i]);
while ( (s2=strtok( s1, delim )) != NULL ) {
fprintf( pfout, "%s\n", s2 );
s1 = NULL;
}
}
fin.close( );
pclose(pfout);
return 0;
}
30
Friday March 23, 2007
3/WordFreq.cpp, 4/wordfreq.cpp
2/7
Printed by Owen L. Astrachan
Mar 21, 07 11:44
5
10
15
wordfreq.cpp
Page 1/1
#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include <algorithm>
#include <vector>
#include <cctype>
using namespace std;
Mar 21, 07 15:50
5
bool paircomp(const pair<string,int>& a, const pair<string,int>& b){
if (a.second == b.second){
return a.first < b.first;
}
return a.second > b.second;
}
10
char mylower(char ch){
return tolower(ch);
}
15
20
25
30
int main(int argc, char * argv[]){
if (argc <= 1){
cout << "usage: " << argv[0] << " filename" << endl;
exit(0);
}
ifstream input(argv[1]);
string word;
map<string,int> table;
while (input >> word){
transform(word.begin(), word.end(),word.begin(),mylower);
table[word]++;
}
vector<pair<string,int> > list(table.begin(), table.end());
sort(list.begin(), list.end(),paircomp);
20
25
30
35
for(int k=0; k < list.size(); k++){
cout << list[k].second << "\t" << list[k].first << endl;
}
35
}
40
Friday March 23, 2007
ola.cpp
Page 1/1
#include <ctype.h>
#include <iostream>
#include <fstream>
#include <map>
#include <vector>
using namespace std;
class WordFreq {
public:
WordFreq(const string &_word, int _count) : word(_word), count(_count) {
}
bool operator< (const WordFreq &other) const {
return count > other.count || (count == other.count && word < ot
her.word);
}
friend ostream &operator<< (ostream &out, const WordFreq &wf);
private:
string word;
int count;
};
ostream &operator<< (ostream &out, const WordFreq &wf) {
out << wf.count << ’\t’ << wf.word;
return out;
}
int main(int argc, char **argv) {
map<string, int> count;
vector<WordFreq> outlist;
string word;
ifstream fin(argv[1]);
while (fin >> word) {
for (unsigned int i=0; i<word.size(); i++)
word[i] = tolower(word[i]);
count[word]++;
}
for (map<string, int>::const_iterator ii=count.begin(); ii!=count.end();
ii++)
outlist.push_back(WordFreq(ii−>first, ii−>second));
sort(outlist.begin(), outlist.end());
for (vector<WordFreq>::const_iterator jj=outlist.begin(); jj!=outlist.en
d(); jj++)
cout << *jj << endl;
return 0;
}
5/wordfreq.cpp, 6/ola.cpp
3/7
Printed by Owen L. Astrachan
poe.cpp
Mar 22, 07 10:05
Page 1/2
Mar 22, 07 10:05
poe.cpp
Page 2/2
/*
5
return 0;
Name: poe.cpp
Author: Kenneth Bjerner
Date: 21−03−07 08:10
Description: Reads words separated by white spaces from a text file. The
output is list of the frequence of the words and the word. The words are
considered case−insensitive. The words with the same frequence are written
in sorted order.
*/
75
80
}
//converts the characters in s to lower case
string to_lower_case(string s)
{
transform(s.begin(), s.end(), s.begin(), to_lower);
//the standard tolower is a macro and can not be used here
10
15
#include
#include
#include
#include
#include
#include
#include
#include
#include
return s;
<fstream>
<iostream>
<map>
<vector>
<algorithm>
<cctype>
<string>
<utility>
<iomanip>
}
85
90
20
using namespace std;
25
30
35
string to_lower_case(string);
char to_lower(char);
typedef pair<string, int> word_pair;
bool compare(word_pair wp1, word_pair wp2);
95
//converts c to lower case.
char to_lower(char c)
{
return tolower(c);
}
//to sort the word pairs in descending frequence order
bool compare(word_pair wp1, word_pair wp2)
{
return wp1.second > wp2.second;
}
int main(int argc, char *argv[])
{
if (argc != 2)
{
cerr << "Wrong number of arguments\nuse: poe <path\\filename>" << endl;
exit(EXIT_FAILURE);
}
else
{
ifstream in(argv[1]);
if (in)
{
string word;
map <string, int> pair_map; //stores the pairs of word and count
40
while (in >> word)
{
word = to_lower_case(word);
pair_map[word]++;
}
in.close();
45
50
//a vector for the word pairs to sort them in descending frequence
//order.
vector<word_pair> pair_vector (pair_map.begin(), pair_map.end());
//sorts the vector with the words frequencesin descending order
//and the words with the same frequence are in lexilogical order
stable_sort(pair_vector.begin(), pair_vector.end(), compare);
55
//output the pairs of frequence and word.
vector<word_pair>::const_iterator cit = pair_vector.begin();
while (cit != pair_vector.end())
{
cout << setw(8) << left << cit −> second << cit −> first << endl
60
;
cit++;
}
65
}
else
{
cerr << "The file could not be opened" << endl;
exit(EXIT_FAILURE);
}
70
}
Friday March 23, 2007
7/poe.cpp
4/7
Printed by Owen L. Astrachan
Mar 22, 07 14:50
5
10
15
20
WordFreq_HHolland.cpp
Page 1/3
// ============================================================================
//
// Program:
Word Frequency Count
// Module:
WordFreq_HHolland.cpp
//
// Author:
Mr. Herbert Holland
//
Electrical Engineering Section
//
Department of Engineering
//
U.S. Coast Guard Academy
//
New London, CT
//
// Date:
20 Mar 07
//
// Purpose:
This program reads a text file whose path/name is provided as
//
a command line argument. Words are case insensitive and all
//
are converted to lowercase. The number of instances of each
//
word is counted. Output is ordered by frequency (greatest first)
//
and then in lexicographical sequence.
//
// Note:
This code was tested using the Borland C++Builder v4 compiler.
//
I have used conditional compilation to get rid of anything that
//
is likely to cause a problem with other compilers.
//
// ============================================================================
Mar 22, 07 14:50
75
80
WordFreq_HHolland.cpp
Page 2/3
// −−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
//
// Data Type:
class WordList
//
// Description:
This class provides a container for WordData that adds
//
new words, increments word counts, and sorts the list so
//
that it can be output as specified.
//
// −−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
class WordList {
85
public :
// Convert the word W to lower case and add it to the list with a
// count of 1, or if it already exists in the list increment its
// count.
void add ( const string & W ) ;
// Sort the list of words by decreasing count and, for words of
// equal frequency, by lexicographical order.
void sort ( void )
{ std::sort( List.begin() , List.end() ) ; } ;
// Inspector functions used to output the finished list.
unsigned size ( void ) const
{ return List.size() ; } ;
int count_at ( unsigned I )
{ return List[I].Count ; } ;
const string & word_at ( unsigned I )
{ return List[I].Word ; } ;
90
95
25
#ifdef __BCPLUSPLUS__
#pragma hdrstop
#include <condefs.h>
#endif
100
private :
vector<WordData>
} ; //
List ;
end class WordList
30
35
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <algorithm>
using namespace std ;
105
110
40
45
// −−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
//
// Data Type:
struct WordData
//
// Description:
Contains information about a single word: the word itself
//
and its count (frequency). External code converts all words
//
to lower case. Makes use of the convenient fact that in C++
//
a struct can have member functions, and in particular
//
constructors. Definition of the == and < operators allows
//
me to create a vector of WordData and sort it.
//
// −−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
115
120
50
struct WordData {
string Word ;
int
Count ;
55
125
// default constructor
WordData ( void )
: Word( "" ), Count( 0 ) {} ;
// word constructor
WordData ( const string & W , int C = 1 ) : Word( W ) , Count( C ) {} ;
} ;
130
60
bool operator== ( const WordData & WD1 , const WordData & WD2 )
{
return ( WD1.Word == WD2.Word && WD1.Count == WD2.Count ) ;
} ;
bool operator< ( const WordData & WD1 , const WordData & WD2 )
{
if ( WD1.Count != WD2.Count )
return ( WD1.Count > WD2.Count ) ; // sort most frequent words first
else
return ( WD1.Word < WD2.Word ) ;
} ;
Friday March 23, 2007
//−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
#pragma argsused
int main(int argc, char* argv[])
{
string
IFName ;
// input file name
ifstream
InFile ;
// input file
string
Word ;
// word read from input
WordList
List ;
// list of words with frequencies
135
if ( argc != 2 ) {
cout << "\nWrong number of command line arguments \n\n" ;
return −1 ;
}
140
IFName = argv[1] ;
// get input file name from command line
InFile.open( IFName.c_str() ) ;
if ( InFile.fail() ) {
cout << "\nUnable to open file " << IFName << " \n\n" ;
return −2 ;
}
65
70
void WordList::add( const string & W )
{
unsigned k ;
// index
string
Wd ( W ) ;
// copy of W
// convert all characters in word to lower case
for ( k = 0 ; k < W.length() ; k++ )
#pragma warn −sig
Wd[k] = tolower( Wd[k] ) ;
#pragma warn .sig
// search for the word in the list −− if found, increment its count
for ( k = 0 ; k < List.size() ; k++ ) {
if ( List[k].Word == Wd ) {
List[k].Count++ ;
break ;
}
}
// if word not in list, add it (with a count of 1)
if ( k == List.size() ) {
List.push_back( WordData( Wd ) ) ;
}
}
145
8/WordFreq_HHolland.cpp
5/7
Printed by Owen L. Astrachan
Mar 22, 07 14:50
WordFreq_HHolland.cpp
Page 3/3
Mar 23, 07 0:26
// read input file word−by−word to end
InFile >> Word ;
while ( ! InFile.eof() ) {
List.add( Word ) ; // adds new words, increments count for existing word
150
s
5
InFile >> Word ;
}
155
InFile.close() ;
List.sort() ;
10
Page 1/2
//#pragma warning (disable : 4786)
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include <algorithm>
#include <vector>
using namespace std;
// display sorted list of counts and words
for ( unsigned I = 0 ; I < List.size() ; I++ )
cout << List.count_at(I) << ’\t’ << List.word_at(I) << endl ;
160
freq.cpp
// Program for Owen written by Briana Morrison
15
return 0 ;
}
20
// program assumes that the filename is the only thing passed into program
// if you are using standard argc and argv, then arguments to main should change
, and uncomment
//
first line.
int main(int argc, char * argv[])
{
string filename(argv[1]);
// string filename;
//cout << "Enter filename" << endl;
//cin >> filename;
25
ifstream infile(filename.c_str());
//ifstream infile("poe.txt");
30
string word;
bool debug = false; // for debugging purposes
int count = 0;
// count of words for debugging
35
// create a map of words to frequencies
map<string, int, less<string> > words;
// create a multimap of frequencies to words
multimap<int, string, greater<int> > freq;
40
45
50
55
60
// loop while there is input in the file
infile >> word; //priming read
while (infile)
{
count++;
// convert word to lowercase
for (int i = 0; i < word.length(); i++)
if (’A’ <= word[i] && word[i] <= ’Z’)
word[i] = tolower(word[i]);
if (debug) cout << word << endl;
// if word not found, add to map, otherwise increment count
if (words.find(word) != words.end())
{
words[word]++;
if (debug) cout << word << " found and count incremented to " <<
words[word] << endl;
}
else
{
words[word] = 1;
if (debug) cout << word << " not found and count incremented to " <
< words[word] << endl;
}
infile >> word;
}
if (debug) cout << "count is " << count << " and map has " << words.size() << endl;
65
Friday March 23, 2007
// now go through map and add everything to multimap...words still in al
phabetical order
map<string, int, less<string> >::iterator it = words.begin();
for (it = words.begin(); it != words.end(); it++)
{
pair<int, string> p(it−>second, it−>first);
8/WordFreq_HHolland.cpp, 9/freq.cpp
6/7
Printed by Owen L. Astrachan
Mar 23, 07 0:26
freq.cpp
Page 2/2
freq.insert(p);
70
}
if (debug) cout << "map has " << words.size() << " and multimap has " << freq.size(
) << endl;
//ofstream outfile("myout.txt");
75
multimap<int, string, greater<int> >::iterator myit=freq.begin();
for (myit = freq.begin(); myit != freq.end(); myit++)
{
cout << myit−>first << "\t" << myit−>second << endl;
}
//outfile.close();
80
return 0;
85
}
Friday March 23, 2007
9/freq.cpp
7/7