> import Data.Function
> import Data.List
> import Data.Ord
> import System.IO
> import Text.Printf
from __future__ import with_statement
from __future__ import division
import pickle
class Review(object):
def __init__(self, movieId, grade, date):
self.movieId = movieId
self.grade = grade
self.date = date
> data Review = Review { movieID :: Int, grade :: Int, date :: String } deriving (Eq, Ord, Show)
class Customer(object):
def __init__(self, idnumber, review):
self.idnumber = idnumber
self.reviews = [review]
> data Customer = Customer { idnumber :: Int, reviews :: [Review] } deriving (Eq, Ord, Show)
def addReview(self, review):
self.reviews += review
def averageGrade(self):
tot = 0
for review in self.reviews:
tot += review.grade
return tot / len(self.reviews)
> averageGrade :: Customer -> Double
> averageGrade (Customer { reviews = rs }) = fromIntegral (sum . map grade $ rs) / fromIntegral (length rs)
def fname(filmnbr):
filename = '/tmp/netflix/mv_00'
nz = 5len(str(filmnbr))
for x in range(0, nz):
filename = filename + '0'
filename = filename + str(filmnbr) + '.txt'
return filename
> fname :: Int -> FilePath
> fname = printf "/tmp/netflix/mv_%07d.txt"
def customerInList(customer, customers):
if customers != None:
for el in customers:
if customer.idnumber == el.idnumber:
el.addReview(customer.reviews)
return customers
return customers + [customer]
def parseFile(filename, customers):
with open(fname(filename)) as moviereviews:
mov_iter = moviereviews.__iter__()
movieId = mov_iter.next().split(":")[0]
for line in mov_iter:
linetext = line.split(",")
r = Review(movieId, int(linetext[1]), linetext[2].strip("\n"))
c = Customer(linetext[0], r)
customers = customerInList(c, customers)
return customers
> split :: Eq a => a -> [a] -> [[a]]
> split ch s = case span (/= ch) s of
> (s', []) -> [s']
> (s', s'') -> s' : split ch (tail s'')
> parse :: String -> [Customer]
> parse str = [ Customer (read . fst . head $ crs) (map snd crs) | crs <- groupBy ((==) `on` fst) . sortBy (comparing fst) $ reviews ]
> where (header:body) = lines str
> movID = read . takeWhile (/= ':') $ header
> reviews = [ (cid, Review { movieID = movID, grade = read g, date = d }) | line <- body, let [cid, g, d] = split ',' line ]
> parseFile :: Int -> IO [Customer]
> parseFile = fmap parse . readFile . fname
def parseAll(start, stop):
parse_to = []
for xfile in range(start, stop+1):
parse_to = parseFile(xfile, parse_to)
return parse_to
> merge :: [Customer] -> [Customer] -> [Customer]
> merge x [] = x
> merge [] y = y
> merge (x:xs) (y:ys) | idnumber x == idnumber y = Customer { idnumber = idnumber x, reviews = reviews x ++ reviews y } : merge xs ys
> | idnumber x < idnumber y = x : merge xs (y:ys)
> | otherwise = y : merge (x:xs) ys
> parseAll :: Int -> Int -> IO [Customer]
> parseAll start stop = do
> files <- mapM parseFile [start..stop]
> return $ foldr1 merge files
parseAll(1, 5)
# rwbarton@functor:/tmp$ grep v '^>' netflix.lhs | time python
# 1.97user 0.01system 0:01.98elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
# 0inputs+0outputs (0major+1262minor)pagefaults 0swaps
> main :: IO ()
> main = do
> x <- parseAll 1 5
> print x
>
>
>
>
>
> import Data.Function
> import Data.Ord
> import System.IO
> import Text.Printf
from __future__ import with_statement
from __future__ import division
import pickle
class Review(object):
def __init__(self, movieId, grade, date):
self.movieId = movieId
self.grade = grade
self.date = date
> data Review = Review { movieID :: Int, grade :: Int, date :: String } deriving (Eq, Show)
class Customer(object):
def __init__(self, idnumber, review):
self.idnumber = idnumber
self.reviews = [review]
> data Customer = Customer { idnumber :: Int, reviews :: [Review] } deriving (Eq, Show)
def addReview(self, review):
self.reviews += review
def fname(filmnbr):
filename = '/tmp/netflix/mv_00'
nz = 5len(str(filmnbr))
for x in range(0, nz):
filename = filename + '0'
filename = filename + str(filmnbr) + '.txt'
return filename
> fname :: Int -> FilePath
> fname = printf "/tmp/netflix/mv_%07d.txt"
def customerInList(customer, customers):
if customers != None:
for el in customers:
if customer.idnumber == el.idnumber:
el.addReview(customer.reviews)
return customers
return customers + [customer]
> updateCustomer :: Customer -> [Customer] -> [Customer]
> updateCustomer x [] = [x]
> updateCustomer x (w:ws) | idnumber x == idnumber w = Customer { idnumber = idnumber x, reviews = reviews x ++ reviews w } : ws
> | otherwise = w : updateCustomer x ws
def parseFile(filename, customers):
with open(fname(filename)) as moviereviews:
mov_iter = moviereviews.__iter__()
movieId = mov_iter.next().split(":")[0]
for line in mov_iter:
linetext = line.split(",")
r = Review(movieId, int(linetext[1]), linetext[2].strip("\n"))
c = Customer(linetext[0], r)
customers = customerInList(c, customers)
return customers
> split :: Eq a => a -> [a] -> [[a]]
> split ch s = case span (/= ch) s of
> (s', []) -> [s']
> (s', s'') -> s' : split ch (tail s'')
> parse :: String -> [Customer]
> parse str = foldr updateCustomer [] reviews
> where (header:body) = lines str
> movID = read . takeWhile (/= ':') $ header
> reviews = [ Customer (read cid) [Review { movieID = movID, grade = read g, date = d }] | line <- body, let [cid, g, d] = split ',' line ]
> parseFile :: Int -> IO [Customer]
> parseFile = fmap parse . readFile . fname
def parseAll(start, stop):
parse_to = []
for xfile in range(start, stop+1):
parse_to = parseFile(xfile, parse_to)
return parse_to
> mergeQuadratic :: [Customer] -> [Customer] -> [Customer]
> mergeQuadratic [] y = y
> mergeQuadratic (x:xs) y = updateCustomer x z
> where z = mergeQuadratic xs y
> parseAll :: Int -> Int -> IO [Customer]
> parseAll start stop = do
> files <- mapM parseFile [start..stop]
> return $ foldr1 mergeQuadratic files
parseAll(1, 5)
# rwbarton@functor:/tmp$ grep v '^>' netflix.lhs | time python
# 1.97user 0.01system 0:01.98elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
# 0inputs+0outputs (0major+1262minor)pagefaults 0swaps
> main :: IO ()
> main = do
> x <- parseAll 1 5
> print x
>
>
>
>