Improved downloader module

This commit is contained in:
Filip Znachor 2022-04-23 14:49:12 +02:00
parent 84d4038c6d
commit 68c2800aec
4 changed files with 128 additions and 101 deletions

View file

@ -4,91 +4,108 @@ import { UrlPool } from "./urlpool";
export class Downloader {
pool: UrlPool;
chunks: Buffer[] = [];
downloaded_chunks = 0;
total_chunks = 0;
chunks: {[U: string]: (Buffer | null)} = {};
chunk_size = 512*1024;
total_size = 0;
chunk_size = Math.round(0.1*1024*1024);
ready: boolean = false;
loading: boolean = false;
destroyed: boolean = false;
constructor(pool: UrlPool) {
from: number;
constructor(pool: UrlPool, from: number) {
this.pool = pool;
this.from = from;
}
async init() {
let url = await this.pool.get();
if(!url) throw "No available URL in pool!";
let r = await axios.get(url[1], {
responseType: 'arraybuffer',
headers: {
Range: `bytes=0-0`
first_chunk(): Buffer | undefined {
let keys = Object.keys(this.chunks);
let key = keys[0];
if(!key) return;
let first_chunk = this.chunks[key];
if(first_chunk instanceof Buffer) {
delete this.chunks[key];
this.from += this.chunk_size;
return first_chunk!;
}
}
collect() {
let collected: Buffer[] = [];
let first_chunk = this.first_chunk();
while(first_chunk) {
collected.push(first_chunk);
first_chunk = this.first_chunk();
}
if(collected.length) return Buffer.concat(collected);
return null;
}
cache() {
let chunks: string[] = [];
let existing_chunks = Object.keys(this.chunks);
for(let i=0; i<15; i++) {
chunks.push((this.from+(this.chunk_size*i)).toString());
}
chunks.forEach(from => {
if(existing_chunks.indexOf(from) == -1) {
this.download_part(parseInt(from), parseInt(from)+(this.chunk_size-1));
this.chunks[from] = null;
}
});
this.total_size = parseInt(r.headers["content-range"].split("/")[1]);
this.pool.return(url[0]);
}
async download_range(from: number, to: number): Promise<Buffer> {
return new Promise(async (complete) => {
if(to > this.total_size) to = this.total_size;
let chunk_count = Math.max(Math.ceil((to-from)/this.chunk_size), 1);
let chunks: Buffer[] = [];
let completed = 0;
for(let i=0; i<chunk_count; i++) {
this.download_chunk(from, to, i).then((result) => {
chunks[i] = result;
completed++;
if(completed == chunk_count) {
complete(Buffer.concat(chunks));
}
});
}
async more(): Promise<Buffer | null | false> {
if(this.loading) return false;
this.loading = true;
if(this.from > this.pool.total_size-1) return null;
this.cache();
let promise: Promise<Buffer | null> = new Promise((resolve) => {
let wait_for_result = () => {
let result = this.collect();
if(result || this.destroyed) {
this.loading = false;
resolve(result);
clearInterval(interval);
if(result) console.log(this.pool.id, "| sending:", Math.round(result.length/1024), "kB");
}
};
let interval = setInterval(wait_for_result, 100);
wait_for_result();
});
return promise;
}
async download_chunk(from: number, to: number, part: number): Promise<Buffer> {
async download_part(from: number, to: number) {
if(to > this.pool.total_size-1) to = this.pool.total_size-1;
if(from > this.pool.total_size-1) return;
let url = await this.pool.get();
if(!url) throw "No available URL!";
let lfrom = from + (part * this.chunk_size) + (part > 0 ? 1 : 0);
let lto = Math.min(from + (part * this.chunk_size) + this.chunk_size, to);
if(lfrom == lto) lfrom--;
if(this.destroyed == true) {
this.pool.return(url[0]);
return;
}
let r = await axios.get(url[1], {
responseType: 'arraybuffer',
headers: {
Range: `bytes=${lfrom}-${lto}`
Range: `bytes=${from}-${to}`
}
});
});
this.chunks[from.toString()] = r.data;
this.pool.return(url[0]);
return r.data;
}
async download_part(i: number) {
let chunk_size = 10_000_000;
let from = i*chunk_size;
let to = (from+chunk_size-1);
if(to > this.total_size) to = this.total_size;
if(this.chunks[i] || from > this.total_size) return false;
this.total_chunks++;
this.chunks[i] = await this.download_range(from, to);
this.downloaded_chunks++;
return true;
destroy() {
this.destroyed = true;
let index = this.pool.downloaders.indexOf(this);
this.pool.downloaders.splice(index, 1);
}
}

View file

@ -39,6 +39,7 @@ export class Links {
let download_link = "https://uloz.to/download-dialog/free/download?fileSlug="+id;
let redirect = await this.inst.get(download_link, {maxRedirects: 0, validateStatus: null});
if(redirect.status !== 302) throw new Error(`Status code: ${redirect.status}`);
this.tor.torNewSession();
if(redirect.headers.location && redirect.headers.location.startsWith("https://download.uloz.to"))
return redirect.headers.location;
@ -48,9 +49,9 @@ export class Links {
async captcha_link(id: string): Promise<string | undefined> {
let download_link = "https://uloz.to/download-dialog/free/download?fileSlug="+id;
let captcha_page = await this.inst.get(download_link, {maxRedirects: 0});
let captcha_page = await this.inst.get(download_link, {maxRedirects: 0, validateStatus: null});
if(captcha_page.status !== 200) throw new Error("Status code is not 200");
if(captcha_page.status !== 200) throw new Error(`Status code: ${captcha_page.status}`);
let cookies = this.parse_cookie(captcha_page.headers["set-cookie"]);
let url = this.regex_parse(/<img class="xapca-image" src="([^"]*)" alt="">/gm, captcha_page.data, 1);
@ -78,7 +79,8 @@ export class Links {
"X-Requested-With": "XMLHttpRequest",
"User-Agent": "Go-http-client/1.1",
"Cookie": cookies
}
},
validateStatus: null
});
this.tor.torNewSession();
return result.data.slowDownloadLink;

View file

@ -11,23 +11,42 @@ function sleep(ms: number) {
export class UrlPool {
id: string;
is_direct = false;
urls: string[] = [];
used: boolean[] = [];
generating = false;
downloader?: Downloader;
valid = false;
downloaders: Downloader[] = [];
total_size: number = 0;
is_direct = false;
ready = false;
constructor(id: string) {
this.id = id;
}
async init() {
let page = await axios.get("https://uloz.to/file/"+this.id, {validateStatus: null});
if(page.status == 404) return false;
// TODO: Add quick download support
// let quick_dl_url = links.regex_parse(new RegExp('href="(/quickDownload/[^"]*)"'), page.data, 1);
this.is_direct = 'js-free-download-button-direct' == links.regex_parse(new RegExp('data-href="/download-dialog/free/[^"]+" +class=".+(js-free-download-button-direct).+"'), page.data, 1);
await this.generate();
let url = await this.get();
if(!url) throw "No available URL in pool!";
let r = await axios.get(url[1], {
responseType: 'arraybuffer',
headers: {
Range: `bytes=0-0`
}
});
this.total_size = parseInt(r.headers["content-range"].split("/")[1]);
this.return(url[0]);
this.ready = true;
return true;
}
@ -74,8 +93,8 @@ export class UrlPool {
async start_generation() {
this.generating = true;
while(this.urls.length < 10) {
this.generate();
while(this.urls.length < 15 && this.generating) {
console.log(this.id, "| new link:", (await this.generate()) ? true : false);
await sleep(2000);
}
this.generating = false;
@ -85,17 +104,18 @@ export class UrlPool {
try {
let link = await (this.is_direct ? links.direct_link(this.id) : links.captcha_link(this.id));
if(link) this.add(link);
console.log(link);
return link;
} catch {
} catch(e) {
console.log(e);
this.generating = false;
return undefined;
}
}
get_downloader() {
if(this.downloader) return this.downloader;
this.downloader = new Downloader(this)
return this.downloader;
get_downloader(from: number) {
let d = new Downloader(this, from);
this.downloaders.push(d);
return d;
}
}

View file

@ -1,5 +1,4 @@
import express from "express";
import { Downloader } from "./downloader";
import { UrlPoolStorage } from "./urlpool";
import { Readable } from 'stream';
@ -8,9 +7,6 @@ let storage = new UrlPoolStorage;
export class Webserver {
chunk_size = 5*1024*1024;
partial_size = this.chunk_size*10;
constructor() {
const app = express();
@ -24,11 +20,17 @@ export class Webserver {
return;
}
res.writeHead(200, {
"Content-Type": "application/json"
});
res.write(JSON.stringify({
streams: {
total: p.urls.length,
available: p.available().length,
generating: p.generating
generating: p.generating,
urls: p.urls,
total_size: p.total_size,
downloaders: p.downloaders.length
}
}));
res.end();
@ -43,40 +45,33 @@ export class Webserver {
res.end();
return;
}
let d = p.get_downloader();
let range: Range = {from: 0, to: null};
if(req.headers.range) range = this.parse_range(req.headers.range);
if(!range.from) range.from = 0;
if(!d.ready) await d.init();
let d = p.get_downloader(range.from);
let [from, to] = this.from_to([range.from, range.from+this.chunk_size], d.total_size);
let contentLength = d.total_size-from;
let contentLength = p.total_size-1-range.from;
let headers = {
"Content-Range": `bytes ${from}-${d.total_size}/${d.total_size+1}`,
"Range": `bytes=${from}-${d.total_size}/${d.total_size+1}`,
"Content-Range": `bytes ${range.from}-${p.total_size-1}/${p.total_size-1}`,
"Range": `bytes=${range.from}-${p.total_size-1}/${p.total_size-1}`,
"Accept-Ranges": "bytes",
"Content-Length": contentLength,
"Content-Type": "application/octet-stream",
};
res.writeHead(206, headers);
res.on("close", () => {
d.destroy();
});
const readable = new Readable()
const readable = new Readable();
readable._read = async () => {
[from, to] = this.from_to([to+1, to+1+this.chunk_size], d.total_size);
if(from == d.total_size) {
readable.push(null);
//res.end();
}
else {
let stream = await this.download_chunk(d, from, to);
readable.push(stream);
}
let stream = await d.more();
readable.push(stream);
}
let stream = await this.download_chunk(d, from, to);
let stream = await d.more();
readable.push(stream);
readable.pipe(res);
@ -88,13 +83,6 @@ export class Webserver {
}
async download_chunk(d: Downloader, from: number, to: number) {
console.log("downloading...", from, to);
let stream = await d.download_range(from, to);
console.log("downloaded ", from, to);
return stream;
}
parse_range(input: string): Range {
let [from, to]: (number|null)[] = [null, null];