/**
 * Copyright 2007 The Apache Software Foundation, All Rights Reserved,
 * Copyright 2008 Ethan Blanton.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you
 * may not use this file except in compliance with the License.  You may
 * obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.  See the License for the specific language governing
 * permissions and limitations under the License.
 *
 * @see http://hadoop.apache.org/core/docs/r0.18.0/mapred_tutorial.html
 */

package edu.purdue.cs.eblanton;

import java.io.IOException;
import java.util.*;

import org.apache.hadoop.mapred.*;
import org.apache.hadoop.io.*;

/**
 * WordMap implements a map operation which takes a WritableComparable
 * and a Text object representing a line of text, and outputs a
 * &lt;Text, IntWritable&gt; tuple for each word in that line of text
 * with the IntWritable having a value of 1.
 *
 * @author The Apache Software Foundation
 * @author Ethan Blanton
 *
 * @see MapReduceBase
 * @see Mapper
 */
public class WordMap extends MapReduceBase
    implements Mapper<WritableComparable, Text, Text, IntWritable> {
    private static final IntWritable count = new IntWritable(1);
    private Text word = new Text();

    /**
     * Implements the WordMap map operation.
     *
     * @param key       the input key (ignored)
     * @param value     the line of text to be tokenized and output
     * @param output    the output collector for the resulting
                        &lt;word, 1&gt; pairs
     * @param reporter  ignored
     */
    public void map(WritableComparable key, Text value,
                    OutputCollector<Text, IntWritable> output,
                    Reporter reporter) throws IOException
    {
        String line = value.toString();
        StringTokenizer tokenizer = new StringTokenizer(line);
        while (tokenizer.hasMoreTokens()) {
            word.set(tokenizer.nextToken());
            output.collect(word, count);
        }
    }
}