Skip to content

Commit

Permalink
Add imdb2torrent files from deflix-stremio v0.11.0
Browse files Browse the repository at this point in the history
https://github.com/doingodswork/deflix-stremio
Revision 254d14c6f8ccc21338001da83299a17a9f6c4b29
  • Loading branch information
doingodswork committed Jan 17, 2021
1 parent aca8442 commit 86fc251
Show file tree
Hide file tree
Showing 10 changed files with 1,577 additions and 0 deletions.
303 changes: 303 additions & 0 deletions 1337x.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,303 @@
package imdb2torrent

import (
"context"
"fmt"
"net/http"
"net/url"
"strconv"
"strings"
"time"

"github.com/PuerkitoBio/goquery"
"go.uber.org/zap"
)

type LeetxClientOptions struct {
BaseURL string
Timeout time.Duration
CacheAge time.Duration
}

func NewLeetxClientOpts(baseURL string, timeout, cacheAge time.Duration) LeetxClientOptions {
return LeetxClientOptions{
BaseURL: baseURL,
Timeout: timeout,
CacheAge: cacheAge,
}
}

var DefaultLeetxClientOpts = LeetxClientOptions{
BaseURL: "https://1337x.to",
Timeout: 5 * time.Second,
CacheAge: 24 * time.Hour,
}

var _ MagnetSearcher = (*leetxClient)(nil)

type leetxClient struct {
baseURL string
httpClient *http.Client
cache Cache
metaGetter MetaGetter
cacheAge time.Duration
logger *zap.Logger
logFoundTorrents bool
}

func NewLeetxClient(opts LeetxClientOptions, cache Cache, metaGetter MetaGetter, logger *zap.Logger, logFoundTorrents bool) *leetxClient {
return &leetxClient{
baseURL: opts.BaseURL,
httpClient: &http.Client{
Timeout: opts.Timeout,
},
cache: cache,
metaGetter: metaGetter,
cacheAge: opts.CacheAge,
logger: logger,
logFoundTorrents: logFoundTorrents,
}
}

// FindMovie scrapes 1337x to find torrents for the given IMDb ID.
// It uses the Stremio Cinemeta remote addon to get a movie name for a given IMDb ID, so it can search 1337x with the name.
// If no error occured, but there are just no torrents for the movie yet, an empty result and *no* error are returned.
func (c *leetxClient) FindMovie(ctx context.Context, imdbID string) ([]Result, error) {
// Get movie name
meta, err := c.metaGetter.GetMovieSimple(ctx, imdbID)
if err != nil {
return nil, fmt.Errorf("Couldn't get movie name via Cinemeta for IMDb ID %v: %v", imdbID, err)
}
movieSearch := meta.Title
if meta.Year != 0 {
movieSearch += " " + strconv.Itoa(meta.Year)
}
movieSearch = url.PathEscape(movieSearch)

urlPath := "category-search/" + movieSearch + "/Movies/1/"

return c.find(ctx, imdbID, urlPath, meta.Title, false)
}

// FindTVShow scrapes 1337x to find torrents for the given IMDb ID + season + episode.
// It uses the Stremio Cinemeta remote addon to get a TV show name for a given IMDb ID, so it can search 1337x with the name.
// If no error occured, but there are just no torrents for the TV show yet, an empty result and *no* error are returned.
func (c *leetxClient) FindTVShow(ctx context.Context, imdbID string, season, episode int) ([]Result, error) {
id := imdbID + ":" + strconv.Itoa(season) + ":" + strconv.Itoa(episode)
meta, err := c.metaGetter.GetTVShowSimple(ctx, imdbID, season, episode)
if err != nil {
return nil, fmt.Errorf("Couldn't get TV show title via Cinemeta for ID %v: %v", id, err)
}
tvShowSearch, err := createTVShowSearch(ctx, c.metaGetter, imdbID, season, episode)
if err != nil {
return nil, err
}
tvShowSearch = url.PathEscape(tvShowSearch)

urlPath := "category-search/" + tvShowSearch + "/TV/1/"

return c.find(ctx, id, urlPath, meta.Title, true)
}

func (c *leetxClient) find(ctx context.Context, id, urlPath, title string, isTVShow bool) ([]Result, error) {
zapFieldID := zap.String("id", id)
zapFieldTorrentSite := zap.String("torrentSite", "1337x")

// Check cache first
cacheKey := id + "-1337x"
torrentList, created, found, err := c.cache.Get(cacheKey)
if err != nil {
c.logger.Error("Couldn't get torrent results from cache", zap.Error(err), zapFieldID, zapFieldTorrentSite)
} else if !found {
c.logger.Debug("Torrent results not found in cache", zapFieldID, zapFieldTorrentSite)
} else if time.Since(created) > (c.cacheAge) {
expiredSince := time.Since(created.Add(c.cacheAge))
c.logger.Debug("Hit cache for torrents, but item is expired", zap.Duration("expiredSince", expiredSince), zapFieldID, zapFieldTorrentSite)
} else {
c.logger.Debug("Hit cache for torrents, returning results", zap.Int("torrentCount", len(torrentList)), zapFieldID, zapFieldTorrentSite)
return torrentList, nil
}

// Search on 1337x

reqUrl := c.baseURL + "/" + urlPath
origDoc, err := c.getDoc(ctx, reqUrl)
if err != nil {
return nil, err
}
// Pick the first element, it's the most likely one to belong to the correct movie / TV show
torrentPath, ok := origDoc.Find(".table-list tbody td a").Next().Attr("href")
if !ok {
return nil, fmt.Errorf("Couldn't find search result")
}

// Try to go via the first search result to the general movie page. This guarantees that all torrents found on that page are definitive matches for the movie.
// But this only works for movies, not for TV shows.
// For movies, if we don't find the general movie page, we can always go back to the original search result page as well.
// TODO: For TV shows we could try to go via the episode page.
var docToSearch *goquery.Document
if isTVShow {
reqUrl = c.baseURL + torrentPath
firstTorrentDoc, err := c.getDoc(ctx, reqUrl)
if err != nil {
c.logger.Warn("Couldn't get HTML doc for first torrent result", zap.Error(err), zapFieldID, zapFieldTorrentSite)
docToSearch = origDoc
} else {
// Find the general movie page URL
movieInfoURL, ok := firstTorrentDoc.Find(".content-row h3 a").Attr("href")
// Only if this was found, we try to go through the torrent pages for the movie page
if ok && movieInfoURL != "" {
reqUrl = c.baseURL + movieInfoURL
docToSearch, err = c.getDoc(ctx, reqUrl)
if err != nil {
// Only log, but continue - we can always use the results from the original search result page
c.logger.Warn("Couldn't get HTML doc for general movie page", zap.Error(err), zapFieldID, zapFieldTorrentSite)
docToSearch = origDoc
}
} else {
docToSearch = origDoc
}
}
} else {
docToSearch = origDoc
}
// Go through elements
var torrentPageURLs []string
docToSearch.Find(".table-list tbody tr").Each(func(i int, s *goquery.Selection) {
linkText := s.Find("a").Next().Text()
if strings.Contains(linkText, "720p") || strings.Contains(linkText, "1080p") || strings.Contains(linkText, "2160p") {
torrentLink, ok := s.Find("a").Next().Attr("href")
if !ok || torrentLink == "" {
c.logger.Warn("Couldn't find link to the torrent page, did the HTML change?", zapFieldID, zapFieldTorrentSite)
return
}
torrentPageURLs = append(torrentPageURLs, c.baseURL+torrentLink)
}
})
// TODO: We should differentiate between "parsing went wrong" and "just no search results".
if len(torrentPageURLs) == 0 {
return nil, nil
}

// Visit each torrent page *in parallel* and get the magnet URL

resultChan := make(chan Result, len(torrentPageURLs))

for _, torrentPageURL := range torrentPageURLs {
// Use configured base URL, which could be a proxy that we want to go through
torrentPageURL, err = replaceURL(torrentPageURL, c.baseURL)
if err != nil {
c.logger.Warn("Couldn't replace URL which was retrieved from an HTML link", zap.Error(err), zapFieldID, zapFieldTorrentSite)
continue
}

go func(goTorrentPageURL string) {
doc, err := c.getDoc(ctx, goTorrentPageURL)
if err != nil {
resultChan <- Result{}
return
}

magnet, ok := doc.Find(".box-info ul li").First().Find("a").Attr("href")
if !ok || magnet == "" {
resultChan <- Result{}
return
}

quality := ""
if strings.Contains(magnet, "720p") {
quality = "720p"
} else if strings.Contains(magnet, "1080p") {
quality = "1080p"
} else if strings.Contains(magnet, "2160p") {
quality = "2160p"
} else {
// This should never be the case, because it was previously checked during scraping
resultChan <- Result{}
return
}

if strings.Contains(magnet, "10bit") {
quality += " 10bit"
}

// https://en.wikipedia.org/wiki/Pirated_movie_release_types
if strings.Contains(magnet, "HDCam") {
quality += (" (⚠️cam)")
}

// We should mark 1337x movies somehow, because we cannot be 100% sure it's the correct movie.
// The quality might later be used as title, as suggested by Stremio.
// (Albeit only in a specific case for a specific reason)
quality += "\n(⚠️guessed match)"

// look for "btih:dd8255ecdc7ca55fb0bbf81323d87062db1f6d1c&" via regex and then cut out the hash
match := magnet2InfoHashRegex.Find([]byte(magnet))
infoHash := strings.TrimPrefix(string(match), "btih:")
infoHash = strings.TrimSuffix(infoHash, "&")
infoHash = strings.ToUpper(infoHash)

if infoHash == "" {
c.logger.Warn("Couldn't extract info_hash. Did the HTML change?", zap.String("magnet", magnet), zapFieldID, zapFieldTorrentSite)
resultChan <- Result{}
return
} else if len(infoHash) != 40 {
c.logger.Warn("InfoHash isn't 40 characters long", zap.String("magnet", magnet), zapFieldID, zapFieldTorrentSite)
resultChan <- Result{}
return
}

result := Result{
Title: title,
Quality: quality,
InfoHash: infoHash,
MagnetURL: magnet,
}
if c.logFoundTorrents {
c.logger.Debug("Found torrent", zap.String("title", title), zap.String("quality", quality), zap.String("infoHash", infoHash), zap.String("magnet", magnet), zapFieldID, zapFieldTorrentSite)
}

resultChan <- result
}(torrentPageURL)
}

var results []Result
// We don't use a timeout channel because the HTTP clients have a timeout so the goroutines are guaranteed to finish
for i := 0; i < len(torrentPageURLs); i++ {
result := <-resultChan
if result.MagnetURL != "" {
results = append(results, result)
}
}

// Fill cache, even if there are no results, because that's just the current state of the torrent site.
// Any actual errors would have returned earlier.
if err := c.cache.Set(cacheKey, results); err != nil {
c.logger.Error("Couldn't cache torrents", zap.Error(err), zap.String("cache", "torrent"), zapFieldID, zapFieldTorrentSite)
}

return results, nil
}

func (c *leetxClient) IsSlow() bool {
return false
}

func (c *leetxClient) getDoc(ctx context.Context, url string) (*goquery.Document, error) {
res, err := c.httpClient.Get(url)
if err != nil {
return nil, fmt.Errorf("Couldn't GET %v: %v", url, err)
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return nil, fmt.Errorf("Bad GET response: %v", res.StatusCode)
}

// Load the HTML document
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return nil, fmt.Errorf("Couldn't load the HTML in goquery: %v", err)
}

return doc, nil
}
60 changes: 60 additions & 0 deletions cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package imdb2torrent

import (
"sync"
"time"
)

// CacheItem combines Result objects and a creation time in a single struct.
// This can be useful for implementing the Cache interface, but is not necessarily required.
// See the InMemoryCache example implementation of the Cache interface for its usage.
type CacheItem struct {
Results []Result
Created time.Time
}

// Cache is the interface that the imdb2torrent clients use for caching results.
// A package user must pass an implementation of this interface.
// Usually you create a simple wrapper around an existing cache package.
// An example implementation is the InMemoryCache in this package.
type Cache interface {
Set(key string, results []Result) error
Get(key string) ([]Result, time.Time, bool, error)
}

var _ Cache = (*InMemoryCache)(nil)

// InMemoryCache is an example implementation of the Cache interface.
// It doesn't persist its data, so it's not suited for production use of the imdb2torrent package.
type InMemoryCache struct {
cache map[string]CacheItem
lock *sync.RWMutex
}

// NewInMemoryCache creates a new InMemoryCache.
func NewInMemoryCache() *InMemoryCache {
return &InMemoryCache{
cache: map[string]CacheItem{},
lock: &sync.RWMutex{},
}
}

// Set stores Result objects and the current time in the cache.
func (c *InMemoryCache) Set(key string, results []Result) error {
c.lock.Lock()
defer c.lock.Unlock()
c.cache[key] = CacheItem{
Results: results,
Created: time.Now(),
}
return nil
}

// Get returns Result objects and the time they were cached from the cache.
// The boolean return value signals if the value was found in the cache.
func (c *InMemoryCache) Get(key string) ([]Result, time.Time, bool, error) {
c.lock.RLock()
defer c.lock.RUnlock()
cacheItem, found := c.cache[key]
return cacheItem.Results, cacheItem.Created, found, nil
}
Loading

0 comments on commit 86fc251

Please sign in to comment.