Improved downloader module
This commit is contained in:
parent
84d4038c6d
commit
68c2800aec
131
downloader.ts
131
downloader.ts
|
@ -4,91 +4,108 @@ import { UrlPool } from "./urlpool";
|
|||
export class Downloader {
|
||||
|
||||
pool: UrlPool;
|
||||
chunks: Buffer[] = [];
|
||||
downloaded_chunks = 0;
|
||||
total_chunks = 0;
|
||||
chunks: {[U: string]: (Buffer | null)} = {};
|
||||
|
||||
chunk_size = 512*1024;
|
||||
total_size = 0;
|
||||
chunk_size = Math.round(0.1*1024*1024);
|
||||
|
||||
ready: boolean = false;
|
||||
loading: boolean = false;
|
||||
destroyed: boolean = false;
|
||||
|
||||
constructor(pool: UrlPool) {
|
||||
from: number;
|
||||
|
||||
constructor(pool: UrlPool, from: number) {
|
||||
this.pool = pool;
|
||||
this.from = from;
|
||||
}
|
||||
|
||||
async init() {
|
||||
let url = await this.pool.get();
|
||||
if(!url) throw "No available URL in pool!";
|
||||
let r = await axios.get(url[1], {
|
||||
responseType: 'arraybuffer',
|
||||
headers: {
|
||||
Range: `bytes=0-0`
|
||||
first_chunk(): Buffer | undefined {
|
||||
let keys = Object.keys(this.chunks);
|
||||
let key = keys[0];
|
||||
if(!key) return;
|
||||
let first_chunk = this.chunks[key];
|
||||
if(first_chunk instanceof Buffer) {
|
||||
delete this.chunks[key];
|
||||
this.from += this.chunk_size;
|
||||
return first_chunk!;
|
||||
}
|
||||
}
|
||||
|
||||
collect() {
|
||||
let collected: Buffer[] = [];
|
||||
let first_chunk = this.first_chunk();
|
||||
while(first_chunk) {
|
||||
collected.push(first_chunk);
|
||||
first_chunk = this.first_chunk();
|
||||
}
|
||||
if(collected.length) return Buffer.concat(collected);
|
||||
return null;
|
||||
}
|
||||
|
||||
cache() {
|
||||
let chunks: string[] = [];
|
||||
let existing_chunks = Object.keys(this.chunks);
|
||||
for(let i=0; i<15; i++) {
|
||||
chunks.push((this.from+(this.chunk_size*i)).toString());
|
||||
}
|
||||
chunks.forEach(from => {
|
||||
if(existing_chunks.indexOf(from) == -1) {
|
||||
this.download_part(parseInt(from), parseInt(from)+(this.chunk_size-1));
|
||||
this.chunks[from] = null;
|
||||
}
|
||||
});
|
||||
this.total_size = parseInt(r.headers["content-range"].split("/")[1]);
|
||||
this.pool.return(url[0]);
|
||||
}
|
||||
|
||||
async download_range(from: number, to: number): Promise<Buffer> {
|
||||
return new Promise(async (complete) => {
|
||||
|
||||
if(to > this.total_size) to = this.total_size;
|
||||
|
||||
let chunk_count = Math.max(Math.ceil((to-from)/this.chunk_size), 1);
|
||||
let chunks: Buffer[] = [];
|
||||
let completed = 0;
|
||||
|
||||
for(let i=0; i<chunk_count; i++) {
|
||||
|
||||
this.download_chunk(from, to, i).then((result) => {
|
||||
chunks[i] = result;
|
||||
completed++;
|
||||
if(completed == chunk_count) {
|
||||
complete(Buffer.concat(chunks));
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
async more(): Promise<Buffer | null | false> {
|
||||
if(this.loading) return false;
|
||||
this.loading = true;
|
||||
if(this.from > this.pool.total_size-1) return null;
|
||||
|
||||
this.cache();
|
||||
let promise: Promise<Buffer | null> = new Promise((resolve) => {
|
||||
let wait_for_result = () => {
|
||||
let result = this.collect();
|
||||
if(result || this.destroyed) {
|
||||
this.loading = false;
|
||||
resolve(result);
|
||||
clearInterval(interval);
|
||||
if(result) console.log(this.pool.id, "| sending:", Math.round(result.length/1024), "kB");
|
||||
}
|
||||
};
|
||||
let interval = setInterval(wait_for_result, 100);
|
||||
wait_for_result();
|
||||
});
|
||||
return promise;
|
||||
}
|
||||
|
||||
async download_chunk(from: number, to: number, part: number): Promise<Buffer> {
|
||||
async download_part(from: number, to: number) {
|
||||
|
||||
if(to > this.pool.total_size-1) to = this.pool.total_size-1;
|
||||
if(from > this.pool.total_size-1) return;
|
||||
|
||||
let url = await this.pool.get();
|
||||
if(!url) throw "No available URL!";
|
||||
|
||||
let lfrom = from + (part * this.chunk_size) + (part > 0 ? 1 : 0);
|
||||
let lto = Math.min(from + (part * this.chunk_size) + this.chunk_size, to);
|
||||
if(lfrom == lto) lfrom--;
|
||||
if(this.destroyed == true) {
|
||||
this.pool.return(url[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
let r = await axios.get(url[1], {
|
||||
responseType: 'arraybuffer',
|
||||
headers: {
|
||||
Range: `bytes=${lfrom}-${lto}`
|
||||
Range: `bytes=${from}-${to}`
|
||||
}
|
||||
});
|
||||
});
|
||||
this.chunks[from.toString()] = r.data;
|
||||
|
||||
this.pool.return(url[0]);
|
||||
return r.data;
|
||||
|
||||
}
|
||||
|
||||
async download_part(i: number) {
|
||||
|
||||
let chunk_size = 10_000_000;
|
||||
let from = i*chunk_size;
|
||||
let to = (from+chunk_size-1);
|
||||
if(to > this.total_size) to = this.total_size;
|
||||
|
||||
if(this.chunks[i] || from > this.total_size) return false;
|
||||
this.total_chunks++;
|
||||
|
||||
this.chunks[i] = await this.download_range(from, to);
|
||||
this.downloaded_chunks++;
|
||||
return true;
|
||||
|
||||
destroy() {
|
||||
this.destroyed = true;
|
||||
let index = this.pool.downloaders.indexOf(this);
|
||||
this.pool.downloaders.splice(index, 1);
|
||||
}
|
||||
|
||||
}
|
8
links.ts
8
links.ts
|
@ -39,6 +39,7 @@ export class Links {
|
|||
|
||||
let download_link = "https://uloz.to/download-dialog/free/download?fileSlug="+id;
|
||||
let redirect = await this.inst.get(download_link, {maxRedirects: 0, validateStatus: null});
|
||||
if(redirect.status !== 302) throw new Error(`Status code: ${redirect.status}`);
|
||||
this.tor.torNewSession();
|
||||
if(redirect.headers.location && redirect.headers.location.startsWith("https://download.uloz.to"))
|
||||
return redirect.headers.location;
|
||||
|
@ -48,9 +49,9 @@ export class Links {
|
|||
async captcha_link(id: string): Promise<string | undefined> {
|
||||
|
||||
let download_link = "https://uloz.to/download-dialog/free/download?fileSlug="+id;
|
||||
let captcha_page = await this.inst.get(download_link, {maxRedirects: 0});
|
||||
let captcha_page = await this.inst.get(download_link, {maxRedirects: 0, validateStatus: null});
|
||||
|
||||
if(captcha_page.status !== 200) throw new Error("Status code is not 200");
|
||||
if(captcha_page.status !== 200) throw new Error(`Status code: ${captcha_page.status}`);
|
||||
let cookies = this.parse_cookie(captcha_page.headers["set-cookie"]);
|
||||
|
||||
let url = this.regex_parse(/<img class="xapca-image" src="([^"]*)" alt="">/gm, captcha_page.data, 1);
|
||||
|
@ -78,7 +79,8 @@ export class Links {
|
|||
"X-Requested-With": "XMLHttpRequest",
|
||||
"User-Agent": "Go-http-client/1.1",
|
||||
"Cookie": cookies
|
||||
}
|
||||
},
|
||||
validateStatus: null
|
||||
});
|
||||
this.tor.torNewSession();
|
||||
return result.data.slowDownloadLink;
|
||||
|
|
42
urlpool.ts
42
urlpool.ts
|
@ -11,23 +11,42 @@ function sleep(ms: number) {
|
|||
export class UrlPool {
|
||||
|
||||
id: string;
|
||||
is_direct = false;
|
||||
|
||||
urls: string[] = [];
|
||||
used: boolean[] = [];
|
||||
|
||||
generating = false;
|
||||
downloader?: Downloader;
|
||||
valid = false;
|
||||
downloaders: Downloader[] = [];
|
||||
|
||||
total_size: number = 0;
|
||||
is_direct = false;
|
||||
ready = false;
|
||||
|
||||
constructor(id: string) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
async init() {
|
||||
|
||||
let page = await axios.get("https://uloz.to/file/"+this.id, {validateStatus: null});
|
||||
if(page.status == 404) return false;
|
||||
// TODO: Add quick download support
|
||||
// let quick_dl_url = links.regex_parse(new RegExp('href="(/quickDownload/[^"]*)"'), page.data, 1);
|
||||
this.is_direct = 'js-free-download-button-direct' == links.regex_parse(new RegExp('data-href="/download-dialog/free/[^"]+" +class=".+(js-free-download-button-direct).+"'), page.data, 1);
|
||||
|
||||
await this.generate();
|
||||
let url = await this.get();
|
||||
if(!url) throw "No available URL in pool!";
|
||||
let r = await axios.get(url[1], {
|
||||
responseType: 'arraybuffer',
|
||||
headers: {
|
||||
Range: `bytes=0-0`
|
||||
}
|
||||
});
|
||||
this.total_size = parseInt(r.headers["content-range"].split("/")[1]);
|
||||
this.return(url[0]);
|
||||
|
||||
this.ready = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -74,8 +93,8 @@ export class UrlPool {
|
|||
|
||||
async start_generation() {
|
||||
this.generating = true;
|
||||
while(this.urls.length < 10) {
|
||||
this.generate();
|
||||
while(this.urls.length < 15 && this.generating) {
|
||||
console.log(this.id, "| new link:", (await this.generate()) ? true : false);
|
||||
await sleep(2000);
|
||||
}
|
||||
this.generating = false;
|
||||
|
@ -85,17 +104,18 @@ export class UrlPool {
|
|||
try {
|
||||
let link = await (this.is_direct ? links.direct_link(this.id) : links.captcha_link(this.id));
|
||||
if(link) this.add(link);
|
||||
console.log(link);
|
||||
return link;
|
||||
} catch {
|
||||
} catch(e) {
|
||||
console.log(e);
|
||||
this.generating = false;
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
get_downloader() {
|
||||
if(this.downloader) return this.downloader;
|
||||
this.downloader = new Downloader(this)
|
||||
return this.downloader;
|
||||
get_downloader(from: number) {
|
||||
let d = new Downloader(this, from);
|
||||
this.downloaders.push(d);
|
||||
return d;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
48
webserver.ts
48
webserver.ts
|
@ -1,5 +1,4 @@
|
|||
import express from "express";
|
||||
import { Downloader } from "./downloader";
|
||||
import { UrlPoolStorage } from "./urlpool";
|
||||
import { Readable } from 'stream';
|
||||
|
||||
|
@ -8,9 +7,6 @@ let storage = new UrlPoolStorage;
|
|||
|
||||
export class Webserver {
|
||||
|
||||
chunk_size = 5*1024*1024;
|
||||
partial_size = this.chunk_size*10;
|
||||
|
||||
constructor() {
|
||||
|
||||
const app = express();
|
||||
|
@ -24,11 +20,17 @@ export class Webserver {
|
|||
return;
|
||||
}
|
||||
|
||||
res.writeHead(200, {
|
||||
"Content-Type": "application/json"
|
||||
});
|
||||
res.write(JSON.stringify({
|
||||
streams: {
|
||||
total: p.urls.length,
|
||||
available: p.available().length,
|
||||
generating: p.generating
|
||||
generating: p.generating,
|
||||
urls: p.urls,
|
||||
total_size: p.total_size,
|
||||
downloaders: p.downloaders.length
|
||||
}
|
||||
}));
|
||||
res.end();
|
||||
|
@ -43,40 +45,33 @@ export class Webserver {
|
|||
res.end();
|
||||
return;
|
||||
}
|
||||
let d = p.get_downloader();
|
||||
|
||||
let range: Range = {from: 0, to: null};
|
||||
if(req.headers.range) range = this.parse_range(req.headers.range);
|
||||
if(!range.from) range.from = 0;
|
||||
|
||||
if(!d.ready) await d.init();
|
||||
let d = p.get_downloader(range.from);
|
||||
|
||||
let [from, to] = this.from_to([range.from, range.from+this.chunk_size], d.total_size);
|
||||
|
||||
let contentLength = d.total_size-from;
|
||||
let contentLength = p.total_size-1-range.from;
|
||||
let headers = {
|
||||
"Content-Range": `bytes ${from}-${d.total_size}/${d.total_size+1}`,
|
||||
"Range": `bytes=${from}-${d.total_size}/${d.total_size+1}`,
|
||||
"Content-Range": `bytes ${range.from}-${p.total_size-1}/${p.total_size-1}`,
|
||||
"Range": `bytes=${range.from}-${p.total_size-1}/${p.total_size-1}`,
|
||||
"Accept-Ranges": "bytes",
|
||||
"Content-Length": contentLength,
|
||||
"Content-Type": "application/octet-stream",
|
||||
};
|
||||
|
||||
res.writeHead(206, headers);
|
||||
res.on("close", () => {
|
||||
d.destroy();
|
||||
});
|
||||
|
||||
const readable = new Readable()
|
||||
const readable = new Readable();
|
||||
readable._read = async () => {
|
||||
[from, to] = this.from_to([to+1, to+1+this.chunk_size], d.total_size);
|
||||
if(from == d.total_size) {
|
||||
readable.push(null);
|
||||
//res.end();
|
||||
}
|
||||
else {
|
||||
let stream = await this.download_chunk(d, from, to);
|
||||
readable.push(stream);
|
||||
}
|
||||
let stream = await d.more();
|
||||
readable.push(stream);
|
||||
}
|
||||
let stream = await this.download_chunk(d, from, to);
|
||||
let stream = await d.more();
|
||||
readable.push(stream);
|
||||
readable.pipe(res);
|
||||
|
||||
|
@ -88,13 +83,6 @@ export class Webserver {
|
|||
|
||||
}
|
||||
|
||||
async download_chunk(d: Downloader, from: number, to: number) {
|
||||
console.log("downloading...", from, to);
|
||||
let stream = await d.download_range(from, to);
|
||||
console.log("downloaded ", from, to);
|
||||
return stream;
|
||||
}
|
||||
|
||||
parse_range(input: string): Range {
|
||||
|
||||
let [from, to]: (number|null)[] = [null, null];
|
||||
|
|
Reference in a new issue