Initial commit: delta-chat-bot
This commit is contained in:
commit
8f47610133
10 changed files with 3603 additions and 0 deletions
17
php-proxy/.htaccess
Normal file
17
php-proxy/.htaccess
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
Options -Indexes
|
||||
RewriteEngine On
|
||||
|
||||
# Allow only bot IP
|
||||
RewriteCond %{REMOTE_ADDR} !^90\.188\.48\.201$
|
||||
RewriteRule ^ - [F,L]
|
||||
|
||||
# Nice URLs: /rss/gremtelegram -> rss.php?channel=gremtelegram
|
||||
RewriteRule ^rss/([a-zA-Z0-9_]+)$ rss.php?channel=$1 [L,QSA]
|
||||
|
||||
# Block direct access to cache directory
|
||||
RewriteRule ^cache/ - [F,L]
|
||||
|
||||
# Cache images for 7 days
|
||||
<FilesMatch "\.(jpg|jpeg|png|gif|webp)$">
|
||||
Header set Cache-Control "public, max-age=604800, immutable"
|
||||
</FilesMatch>
|
||||
58
php-proxy/SETUP.md
Normal file
58
php-proxy/SETUP.md
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
# PHP-Proxy для Telegram RSS
|
||||
|
||||
Архитектура:
|
||||
```
|
||||
tg.i-c-a.su (10-17s) → proxy.budaev.org (PHP + кэш) → бот (<0.1s)
|
||||
```
|
||||
|
||||
## Установка
|
||||
|
||||
### 1. Загрузить файлы на хостинг
|
||||
|
||||
Через FTP залить всё содержимое `php-proxy/` в корень домена `proxy.budaev.org`:
|
||||
```
|
||||
/home/USER/www/proxy.budaev.org/
|
||||
.htaccess
|
||||
rss.php
|
||||
media.php
|
||||
channels.json
|
||||
```
|
||||
|
||||
### 2. Прогреть кэш
|
||||
|
||||
Открыть в браузере (каждый запрос ~10-17с):
|
||||
```
|
||||
https://proxy.budaev.org/rss/gremtelegram
|
||||
https://proxy.budaev.org/rss/raiznews
|
||||
https://proxy.budaev.org/rss/droidergram
|
||||
https://proxy.budaev.org/rss/mkvburyatii
|
||||
https://proxy.budaev.org/rss/markettwits
|
||||
```
|
||||
|
||||
### 3. Обновить deltabot.py
|
||||
|
||||
Изменения уже сделаны локально. Задеплоить на сервер:
|
||||
```
|
||||
scp deltabot.py SERVER:~/delta-bot/ && ssh SERVER sudo systemctl restart deltabot
|
||||
```
|
||||
|
||||
## Как это работает
|
||||
|
||||
- **Без cron** — кэш наполняется лениво, по первому запросу. Крон не нужен.
|
||||
- **`rss.php`** — отдаёт RSS из кэша (TTL 600с). При кэш-миссе проксирует с tg.i-c-a.su, кэширует.
|
||||
- **`media.php`** — отдаёт картинки из кэша (TTL 24ч). При кэш-миссе скачивает и кэширует.
|
||||
- **Бот** — стучится на `rss.php` как на обычный RSS-прокси. Ничего не знает про кэш.
|
||||
|
||||
## Добавление нового канала
|
||||
|
||||
После `/channels add username`:
|
||||
1. Первый poll — медленный (10-17с, `rss.php` проксирует с tg.i-c-a.su)
|
||||
2. Со второго — мгновенно из кэша
|
||||
3. Картинка первого поста — тоже медленная, дальше мгновенно
|
||||
|
||||
Можно сразу прогреть вручную: открыть `https://proxy.budaev.org/rss/username`.
|
||||
|
||||
## Важно
|
||||
|
||||
- Не забудь добавить username в `channels.json` (для мониторинга, не влияет на работу)
|
||||
- Картинки живут в кэше 24ч, потом перезапрашиваются
|
||||
8
php-proxy/channels.json
Normal file
8
php-proxy/channels.json
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
[
|
||||
"markettwits",
|
||||
"raiznews",
|
||||
"droidergram",
|
||||
"gremtelegram",
|
||||
"postnauka",
|
||||
"kartiny2"
|
||||
]
|
||||
120
php-proxy/media.php
Normal file
120
php-proxy/media.php
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
<?php
|
||||
/**
|
||||
* Telegram Media Proxy with cache
|
||||
*
|
||||
* GET /media.php?url=https://tg.i-c-a.su/media/CHANNEL/POSTID/HASH.ext
|
||||
*
|
||||
* Returns cached image or proxies from tg.i-c-a.su.
|
||||
* Old cache files are cleaned up probabilistically (~2% of requests).
|
||||
*/
|
||||
|
||||
define('IMG_DIR', __DIR__ . '/cache/img');
|
||||
define('TG_BASE', 'https://tg.i-c-a.su');
|
||||
define('UA', 'Mozilla/5.0 (compatible; ProxyBot/1.0)');
|
||||
define('MEDIA_TTL', 86400);
|
||||
|
||||
$url = $_GET['url'] ?? '';
|
||||
if (!$url || strpos($url, TG_BASE) !== 0) {
|
||||
header('HTTP/1.1 400 Bad Request');
|
||||
echo 'Invalid URL';
|
||||
exit;
|
||||
}
|
||||
|
||||
if (!is_dir(IMG_DIR)) mkdir(IMG_DIR, 0755, true);
|
||||
|
||||
// Determine cache path from URL
|
||||
$path = parse_url($url, PHP_URL_PATH);
|
||||
$basename = basename($path);
|
||||
if (!preg_match('/\.\w+$/', $basename)) {
|
||||
$basename .= '.jpg';
|
||||
}
|
||||
$cacheFile = IMG_DIR . '/' . $basename;
|
||||
|
||||
// Content type map
|
||||
$extToType = [
|
||||
'jpg' => 'image/jpeg',
|
||||
'jpeg' => 'image/jpeg',
|
||||
'png' => 'image/png',
|
||||
'gif' => 'image/gif',
|
||||
'webp' => 'image/webp',
|
||||
];
|
||||
$ext = strtolower(pathinfo($basename, PATHINFO_EXTENSION));
|
||||
$contentType = $extToType[$ext] ?? 'image/jpeg';
|
||||
|
||||
// Periodic cleanup (~2% of requests)
|
||||
maybeCleanOldCache();
|
||||
|
||||
// Serve from cache if fresh
|
||||
if (file_exists($cacheFile) && filemtime($cacheFile) > time() - MEDIA_TTL) {
|
||||
header('Content-Type: ' . $contentType);
|
||||
header('Content-Length: ' . filesize($cacheFile));
|
||||
header('Cache-Control: public, max-age=' . MEDIA_TTL);
|
||||
header('X-Cache: HIT');
|
||||
readfile($cacheFile);
|
||||
exit;
|
||||
}
|
||||
|
||||
// Fetch from tg.i-c-a.su
|
||||
$ctx = stream_context_create(['http' => ['timeout' => 60, 'user_agent' => UA, 'follow_location' => true]]);
|
||||
$data = @file_get_contents($url, false, $ctx);
|
||||
|
||||
if (!$data) {
|
||||
header('HTTP/1.1 502 Bad Gateway');
|
||||
echo 'Failed to fetch media';
|
||||
exit;
|
||||
}
|
||||
|
||||
// Verify it's actually an image before caching
|
||||
$imageSignatures = [
|
||||
"\xff\xd8\xff", // JPEG
|
||||
"\x89\x50\x4e\x47", // PNG
|
||||
"GIF87a", // GIF
|
||||
"GIF89a", // GIF
|
||||
"RIFF", // WebP
|
||||
];
|
||||
$isImage = false;
|
||||
foreach ($imageSignatures as $sig) {
|
||||
if (strncmp($data, $sig, strlen($sig)) === 0) {
|
||||
$isImage = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$isImage) {
|
||||
header('HTTP/1.1 415 Unsupported Media Type');
|
||||
header('Content-Type: text/plain');
|
||||
echo 'Not an image';
|
||||
exit;
|
||||
}
|
||||
|
||||
// Save to cache
|
||||
file_put_contents($cacheFile, $data);
|
||||
|
||||
// Return
|
||||
header('Content-Type: ' . $contentType);
|
||||
header('Content-Length: ' . strlen($data));
|
||||
header('Cache-Control: public, max-age=' . MEDIA_TTL);
|
||||
header('X-Cache: MISS');
|
||||
echo $data;
|
||||
|
||||
// Clean files older than TTL, runs ~2% of requests
|
||||
function maybeCleanOldCache(): void {
|
||||
if (rand(1, 50) !== 1) return;
|
||||
$dir = IMG_DIR;
|
||||
if (!is_dir($dir)) return;
|
||||
$cutoff = time() - MEDIA_TTL;
|
||||
$imgExts = ['jpg', 'jpeg', 'png', 'gif', 'webp'];
|
||||
$dh = opendir($dir);
|
||||
if (!$dh) return;
|
||||
while (($f = readdir($dh)) !== false) {
|
||||
if ($f === '.' || $f === '..') continue;
|
||||
$fp = $dir . '/' . $f;
|
||||
if (!is_file($fp)) continue;
|
||||
$ext = strtolower(pathinfo($fp, PATHINFO_EXTENSION));
|
||||
// Remove non-image files and files older than TTL
|
||||
if (!in_array($ext, $imgExts) || filemtime($fp) < $cutoff) {
|
||||
unlink($fp);
|
||||
}
|
||||
}
|
||||
closedir($dh);
|
||||
}
|
||||
99
php-proxy/rss.php
Normal file
99
php-proxy/rss.php
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
<?php
|
||||
/**
|
||||
* Telegram RSS Proxy with cache
|
||||
*
|
||||
* GET /rss.php?channel=CHANNEL&limit=N
|
||||
* or via .htaccess rewrite: /rss/CHANNEL?limit=N
|
||||
*
|
||||
* Returns RSS XML from local cache (if fresh) or proxies from tg.i-c-a.su.
|
||||
* Enclosure URLs are rewritten to go through media.php for local caching.
|
||||
* Cache files are pre-rewritten so cache hits avoid DOMDocument entirely.
|
||||
*/
|
||||
|
||||
define('CACHE_DIR', __DIR__ . '/cache');
|
||||
define('TG_BASE', 'https://tg.i-c-a.su');
|
||||
define('PROXY_BASE', 'https://proxy.budaev.org');
|
||||
define('UA', 'Mozilla/5.0 (compatible; ProxyBot/1.0)');
|
||||
define('CACHE_TTL', 600);
|
||||
|
||||
$channel = preg_replace('/[^a-zA-Z0-9_]/', '', $_GET['channel'] ?? '');
|
||||
if (!$channel) {
|
||||
header('HTTP/1.1 400 Bad Request');
|
||||
echo 'Missing channel parameter';
|
||||
exit;
|
||||
}
|
||||
|
||||
$limit = min(max((int)($_GET['limit'] ?? 10), 1), 100);
|
||||
|
||||
// Staggered TTL: vary per channel to avoid mass expiration
|
||||
$stagger = crc32($channel) % 300;
|
||||
$ttl = CACHE_TTL + $stagger;
|
||||
|
||||
// Non-standard limits: fetch directly, no caching
|
||||
if ($limit !== 10) {
|
||||
$xml = fetchFeed($channel, $limit, true);
|
||||
if ($xml === null) {
|
||||
header('HTTP/1.1 502 Bad Gateway');
|
||||
echo 'Failed to fetch feed';
|
||||
exit;
|
||||
}
|
||||
header('Content-Type: application/rss+xml; charset=utf-8');
|
||||
echo $xml;
|
||||
exit;
|
||||
}
|
||||
|
||||
$cacheFile = CACHE_DIR . '/' . $channel . '.xml';
|
||||
|
||||
// Cache hit — pre-rewritten, no DOMDocument needed
|
||||
if (file_exists($cacheFile) && filemtime($cacheFile) > time() - $ttl) {
|
||||
header('Content-Type: application/rss+xml; charset=utf-8');
|
||||
readfile($cacheFile);
|
||||
exit;
|
||||
}
|
||||
|
||||
// Cache miss or stale — fetch and rewrite in one DOMDocument pass
|
||||
$xml = fetchFeed($channel, $limit, false);
|
||||
if ($xml === null) {
|
||||
// Serve stale cache if available
|
||||
if (file_exists($cacheFile)) {
|
||||
header('Content-Type: application/rss+xml; charset=utf-8');
|
||||
readfile($cacheFile);
|
||||
exit;
|
||||
}
|
||||
// No cache at all — return empty feed so bot doesn't fall back to tg.i-c-a.su
|
||||
header('Content-Type: application/rss+xml; charset=utf-8');
|
||||
echo '<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"><channel><title>' . htmlspecialchars($channel) . '</title></channel></rss>';
|
||||
exit;
|
||||
}
|
||||
|
||||
// Save pre-rewritten XML to cache
|
||||
file_put_contents($cacheFile, $xml);
|
||||
|
||||
header('Content-Type: application/rss+xml; charset=utf-8');
|
||||
echo $xml;
|
||||
|
||||
|
||||
function fetchFeed(string $channel, int $limit, bool $skipRewrite): ?string {
|
||||
$url = TG_BASE . '/rss/' . $channel . '?limit=' . $limit;
|
||||
$ctx = stream_context_create(['http' => ['timeout' => 30, 'user_agent' => UA, 'follow_location' => true]]);
|
||||
$raw = @file_get_contents($url, false, $ctx);
|
||||
if (!$raw) return null;
|
||||
|
||||
if ($skipRewrite) {
|
||||
return $raw;
|
||||
}
|
||||
|
||||
// Parse DOM once, rewrite enclosures, return pre-rewritten XML
|
||||
$doc = new DOMDocument();
|
||||
@$doc->loadXML($raw);
|
||||
if (!$doc->documentElement) return null;
|
||||
|
||||
foreach ($doc->getElementsByTagName('enclosure') as $enc) {
|
||||
$encUrl = $enc->getAttribute('url');
|
||||
if (strpos($encUrl, TG_BASE) !== false) {
|
||||
$enc->setAttribute('url', PROXY_BASE . '/media.php?url=' . urlencode($encUrl));
|
||||
}
|
||||
}
|
||||
|
||||
return $doc->saveXML();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue