java8 stream

来源:互联网 发布:中国畜牧软件网 编辑:程序博客网 时间:2024/04/28 19:22
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;


import static java.util.stream.Collectors.*;


public class MoviesMain {


public static void main(String[] args) {

System.out.print("Loading and preparing data...");


// Load all the data from the files into lists
List<User> users = User.allUsers();
List<Rating> ratings = Rating.allRatings();
List<Movie> movies = Movie.allMovies();

/*
*  Map indexing all movies by their id - primarily for reporting.
*  Key: movie id   Value: Movie with that id
*/
Map <Integer, Movie> moviesById = movies.stream()
.collect(toMap( Movie::getId, Function.identity()));

/*
* Map indexing all movies by their genre.
* Key: genre(string)  Value: List of movies in this genre
* Made slightly more tricky by the fact that movie may be in multiple genres,
* so built using special function
*/

Map <String, List<Movie>> moviesByGenre = new HashMap<>();
movies.stream()
.forEach( m -> addToMapByGenre(moviesByGenre, m));


/*
* Partition user list into two, one containing men and one containing women
*/
Map<Boolean, List<User>> genderSplit = users.stream()
.collect(partitioningBy( u -> u.getGender().equals("M")));

// User ids for all males
Set<Integer> men = genderSplit.get(true).stream()
.map(User::getUserId)
.collect(Collectors.toSet());

// User ids for all females
Set<Integer> women = genderSplit.get(false).stream()
.map(User::getUserId)
.collect(Collectors.toSet());

/*
*  Map of users organised into age groups
*  Key: age group (integer)  Value: List of users in this age group
*/
Map<Integer, List<User>> usersByAge = users.stream()
.collect(groupingBy(User::getAge));

/*
*  Map of average ratings per genre (based on all ratings)
*  Key: genre (string)  Value:  Average rating for all movies in this genre
*/
Map<String, Double> genreAverageRatings = averageRatingsByGenre(ratings.stream(), moviesById);


System.out.println("Done");
System.out.println("\n===========\n");


/*
*  Data Structures built, now do the analytics
*/

// Most popular film for each genre
usersByAge.keySet().stream().forEach(ag -> {// For each age group...
Set<Integer> agUsers = usersByAge.get(ag).stream()//   get all users in the group
.map(User::getUserId)//   extract their ids
.collect(toSet());//   and make them into a set.

Stream<Rating> agRatings = ratings.stream()// Look through all ratings
.filter(rt -> agUsers.contains(rt.getUserId()));// Pick out those made by users in group

// Calculate genre average of ratings for the age group (based on users selected above)
// Key: genre (string)  Value: average of all ratings for the genre 
Map<String, Double> agGenreAverageRatings = averageRatingsByGenre(agRatings, moviesById);   

// Just print this out for now
System.out.println("Most popular Genre for " + ag + ": " + keyOfMaxValue(agGenreAverageRatings));
} );

System.out.println("\n===========\n");


// Least popular for men and women
System.out.println("Most popular for Men:   " + mostPopularForUsers(men, ratings, moviesById).getTitle() );
System.out.println("Most popular for Women: " + mostPopularForUsers(women, ratings, moviesById).getTitle() );
System.out.println("\n===========\n");



System.out.println("Least popular Genre: " + keyOfMinValue(genreAverageRatings));
}


/*
* Utility functions
*/

/*
* Build a map that stores average rating for each genre of movie, based on a supplied stream of ratings.
* Key: genre  Value: average rating for that genre
*/
public static Map<String, Double> averageRatingsByGenre( Stream<Rating> ratings, Map<Integer, Movie> mMap ) {

// Get the average rating for each movie, store in map indexed by movie id
Map<Integer, Double> ratingsByMovie = ratings
.collect(groupingBy(Rating::getMovieId, averagingDouble(Rating::getRating)));

// From above map, generate map combining all average ratings into a list for movies in that genre
// Needs utility function since a movie can belong to several genres
Map <String, List<Double>> genreRatings = new HashMap<>();
ratingsByMovie.keySet().stream()
.forEach( m -> { 
addToRatingsMapByGenre(ratingsByMovie, genreRatings, mMap.get(m));
});

// Aggregate average rating lists from above map into single average
Map <String, Double> aveRatingsByGenre = new HashMap<>();
genreRatings.keySet().stream().forEach( g -> {
aveRatingsByGenre.put(g, genreRatings.get(g).stream().mapToDouble(r -> r).average().orElse(0.0));
});

// Return this map
return aveRatingsByGenre;
}

/*
* Return movie with lowest average rating, based on supplied stream of ratings.
*/
public static Movie leastPopular ( Stream<Rating> ratings, Map<Integer, Movie> mMap ) {
Map<Integer, Double> averageRatings = ratings
.collect(groupingBy(Rating::getMovieId, averagingDouble(Rating::getRating)));// Build map indexed by movieId


//System.out.println("Lowest Average Rating: " + averageRatings.get(keyOfMinValue(averageRatings)));
return mMap.get(keyOfMinValue(averageRatings));
}

/*
* Return movie with highest average rating, based on supplied stream of ratings.
*/
public static Movie mostPopular ( Stream<Rating> ratings, Map<Integer, Movie> mMap ) {
Map<Integer, Double> averageRatings = ratings
.collect(groupingBy(Rating::getMovieId, averagingDouble(Rating::getRating)));// Build map indexed by movieId


//System.out.println("Highest Average Rating: " + averageRatings.get(keyOfMinValue(averageRatings)));
return mMap.get(keyOfMaxValue(averageRatings));
}

/*
* Return movie that has the lowest average rating for ratings from supplied group of users.
*/
public static Movie leastPopularForUsers (Collection <Integer> userIds, List<Rating> r, Map <Integer, Movie> mMap ) {
return leastPopular( r.stream().filter(rt -> userIds.contains(rt.getUserId())), mMap);
}

/*
* Return movie that has the highest average rating from supplied group of users.
*/
public static Movie mostPopularForUsers (Collection <Integer> userIds, List<Rating> r, Map <Integer, Movie> mMap ) {
return mostPopular( r.stream().filter(rt -> userIds.contains(rt.getUserId())), mMap);
}


public static Movie leastPopularByGenre ( String genre, List<Rating> r, Map<Integer, Movie> mMap ) {
return leastPopular( r.stream().filter(rt -> mMap.get(rt.getMovieId()).getGenres().contains(genre)), mMap);
}


/*
*  Add movie to map keyed by genre. Movie may have multiple genres so a little more complex than may appear
*/
public static void addToMapByGenre( Map<String, List<Movie>> mMap, Movie mov ) {
for ( String g: mov.getGenres() ) {
List<Movie> movies = ( mMap.containsKey(g) ) ? mMap.get(g) : new ArrayList<>();
movies.add(mov);
mMap.put(g, movies);
}
}

/*
*  Add movie to ratings map keyed by genre. Movie may have multiple genres so a little more complex than may appear
*/
public static void addToRatingsMapByGenre( Map<Integer, Double> rmMap, Map<String, List<Double>> rMap, Movie mov ) {
double movAvg = rmMap.get(mov.getId());
for ( String g: mov.getGenres() ) {
List<Double> averages = ( rMap.containsKey(g) ) ? rMap.get(g) : new ArrayList<>();
averages.add( movAvg);
rMap.put(g, averages);
}
}

/*
*  Return index of (first) entry in Map<K, V> that indexes the value v
*/
private static <K,V> K findKey ( Map<K, V> m, V v ) {
K returnKey = null; // Use as default value -- ugh should use Optional<K>
   for (Entry<K, V> entry : m.entrySet()) {                // Iterate through hashmap
    if (entry.getValue() == v) {
    returnKey = entry.getKey();
    break;
       }
   }
return returnKey;
}

/*
*  Get index of the largest value in the map (return first one if multiple)
*/
public static <K> K keyOfMaxValue( Map<K, Double> theMap ) {
return findKey( theMap, Collections.max(theMap.values()) );
}


/*
*  Get index of the smallest value in the map (return first one if multiple)
*/
public static <K> K keyOfMinValue( Map<K, Double> theMap ) {
return findKey( theMap, Collections.min(theMap.values()) );
}
}
0 0
原创粉丝点击