Compare commits
No commits in common. "python" and "python-for-channel" have entirely different histories.
python
...
python-for
@ -1,39 +0,0 @@
|
|||||||
name: Actions Build Docker Image
|
|
||||||
run-name: ${{ gitea.actor }} is building new image 🚀
|
|
||||||
on: [push]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
Explore-Gitea-Actions:
|
|
||||||
runs-on: soaska
|
|
||||||
steps:
|
|
||||||
- run: echo "🎉 The job was automatically triggered by a ${{ gitea.event_name }} event."
|
|
||||||
- name: Install Node.js
|
|
||||||
run: |
|
|
||||||
apk add --no-cache nodejs
|
|
||||||
- name: Check out repository code
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
- name: List files in the repository
|
|
||||||
run: |
|
|
||||||
ls ${{ gitea.workspace }}
|
|
||||||
- run: echo "🍏 This job's status is ${{ job.status }}."
|
|
||||||
- name: Install Docker
|
|
||||||
run: |
|
|
||||||
apk add --no-cache docker
|
|
||||||
- name: Start Docker service
|
|
||||||
run: |
|
|
||||||
dockerd &
|
|
||||||
sleep 5
|
|
||||||
docker info
|
|
||||||
- name: Build Dockerfile
|
|
||||||
run: |
|
|
||||||
docker build -t $(basename ${{ github.repository }}) .
|
|
||||||
- name: Upload Docker image to soaska.ru
|
|
||||||
run: |
|
|
||||||
package_name=$(basename ${{ github.repository }})
|
|
||||||
branch_name=$(git rev-parse --abbrev-ref HEAD)
|
|
||||||
|
|
||||||
docker login -u ${{ secrets.username }} -p ${{ secrets.password }} soaska.ru
|
|
||||||
docker tag $package_name soaska.ru/soaska/$package_name:$branch_name
|
|
||||||
docker tag $package_name soaska.ru/soaska/$package_name:latest
|
|
||||||
docker push soaska.ru/soaska/$package_name:$branch_name
|
|
||||||
docker push soaska.ru/soaska/$package_name:latest
|
|
5
.gitignore
vendored
5
.gitignore
vendored
@ -4,7 +4,4 @@
|
|||||||
/.idea
|
/.idea
|
||||||
/__pycache__
|
/__pycache__
|
||||||
/logfile.log
|
/logfile.log
|
||||||
/data
|
/data
|
||||||
|
|
||||||
# Временно
|
|
||||||
/docker
|
|
@ -1,11 +1,15 @@
|
|||||||
FROM python:3.11-slim-bullseye
|
FROM python:3.11-slim-bullseye
|
||||||
|
|
||||||
WORKDIR /app/
|
WORKDIR /app/
|
||||||
|
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
|
|
||||||
RUN pip install -r requirements.txt --no-cache-dir
|
RUN pip install -r requirements.txt --no-cache-dir
|
||||||
|
|
||||||
RUN playwright install chromium
|
RUN playwright install chromium
|
||||||
|
|
||||||
RUN playwright install-deps
|
RUN playwright install-deps
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
ENTRYPOINT ["python", "main.py"]
|
|
||||||
CMD [ "start" ]
|
CMD ["python", "main.py"]
|
@ -14,8 +14,8 @@ cd eljur
|
|||||||
```
|
```
|
||||||
|
|
||||||
#### Создание .env файла
|
#### Создание .env файла
|
||||||
Используйте [sample.env](https://git.soaska.ru/sosiska/eljur/-/blob/main/sample.env?ref_type=heads), чтобы создать свой .env файл.
|
Используйте sample.env, чтобы создать свой .env файл.
|
||||||
[env.example](https://git.soaska.ru/sosiska/eljur/-/blob/main/env.example?ref_type=heads) - пример, как может выглядеть файл .env
|
env.example - пример, как может выглядеть файл .env
|
||||||
|
|
||||||
```
|
```
|
||||||
ELJUR_LOGIN=Vasya2005
|
ELJUR_LOGIN=Vasya2005
|
||||||
|
18
main.py
18
main.py
@ -39,7 +39,7 @@ def update_html(val=""):
|
|||||||
update_html(val=val)
|
update_html(val=val)
|
||||||
except:
|
except:
|
||||||
logging.critical(f"cant update data/table.html: {Exception}")
|
logging.critical(f"cant update data/table.html: {Exception}")
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
def get_html():
|
def get_html():
|
||||||
@ -48,8 +48,8 @@ def get_html():
|
|||||||
with open('data/table.html') as html_file:
|
with open('data/table.html') as html_file:
|
||||||
val = html_file.read()
|
val = html_file.read()
|
||||||
logging.debug("data/table.html got")
|
logging.debug("data/table.html got")
|
||||||
except Exception as error:
|
except:
|
||||||
logging.error(f"cant read data/table.html: {str(error)}")
|
logging.error(f"cant read data/table.html: {Exception}")
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
|
||||||
@ -78,7 +78,6 @@ def verify_data():
|
|||||||
|
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
global new_table, old_table
|
|
||||||
new_table = scrape()
|
new_table = scrape()
|
||||||
old_table = get_html()
|
old_table = get_html()
|
||||||
old_last_block = ""
|
old_last_block = ""
|
||||||
@ -96,19 +95,14 @@ def run():
|
|||||||
|
|
||||||
update_html(new_table)
|
update_html(new_table)
|
||||||
new_table_soup = BeautifulSoup(new_table, "lxml")
|
new_table_soup = BeautifulSoup(new_table, "lxml")
|
||||||
new_table_pretty = new_table_soup.prettify()
|
new_table = new_table_soup.prettify()
|
||||||
old_table_soup = BeautifulSoup(old_table, "lxml")
|
old_table_soup = BeautifulSoup(old_table, "lxml")
|
||||||
old_table_pretty = old_table_soup.prettify()
|
old_table = old_table_soup.prettify()
|
||||||
get_difference(old_table_pretty, new_table_pretty)
|
get_difference(old_table, new_table)
|
||||||
|
|
||||||
|
|
||||||
if old_last_block != new_last_block:
|
if old_last_block != new_last_block:
|
||||||
send_photo("data/screenshot.png")
|
send_photo("data/screenshot.png")
|
||||||
else:
|
|
||||||
send_photo("sad.png")
|
|
||||||
|
|
||||||
send_document("data/difference.html")
|
|
||||||
send_document("data/table.html")
|
|
||||||
|
|
||||||
return
|
return
|
||||||
logging.debug("same result. nothing to do")
|
logging.debug("same result. nothing to do")
|
||||||
|
96
scraper.py
96
scraper.py
@ -6,22 +6,24 @@ from playwright._impl import _api_types
|
|||||||
from os import mkdir
|
from os import mkdir
|
||||||
|
|
||||||
|
|
||||||
# Use sync version of Playwright
|
|
||||||
p = sync_playwright().start()
|
|
||||||
# Launch the browser
|
|
||||||
browser = p.chromium.launch()
|
|
||||||
logging.debug("browser opened")
|
|
||||||
|
|
||||||
# Open a new browser page
|
|
||||||
page = browser.new_page()
|
|
||||||
|
|
||||||
|
|
||||||
def scrape():
|
def scrape():
|
||||||
global page
|
# Use sync version of Playwright
|
||||||
try:
|
|
||||||
# Open our test file in the opened page
|
with sync_playwright() as p:
|
||||||
page.goto(f"https://{DOMAIN}.eljur.ru/authorize?return_uri=%2Fjournal-board-action")
|
|
||||||
try:
|
try:
|
||||||
|
# Launch the browser
|
||||||
|
browser = p.chromium.launch()
|
||||||
|
logging.debug("browser opened")
|
||||||
|
|
||||||
|
# Open a new browser page
|
||||||
|
page = browser.new_page()
|
||||||
|
|
||||||
|
# Create a URI for our test file
|
||||||
|
|
||||||
|
# Open our test file in the opened page
|
||||||
|
page.goto(f"https://{DOMAIN}.eljur.ru/authorize?return_uri=%2Fjournal-board-action")
|
||||||
|
|
||||||
# Log In
|
# Log In
|
||||||
login_field = page.locator('[type="text"]')
|
login_field = page.locator('[type="text"]')
|
||||||
password_field = page.locator('[type="password"]')
|
password_field = page.locator('[type="password"]')
|
||||||
@ -31,40 +33,36 @@ def scrape():
|
|||||||
|
|
||||||
submit_button = page.locator('[type="submit"]')
|
submit_button = page.locator('[type="submit"]')
|
||||||
submit_button.click()
|
submit_button.click()
|
||||||
except Exception as error:
|
|
||||||
logging.debug(f"Error while request: {str(error)}")
|
|
||||||
page.goto(f"https://{DOMAIN}.eljur.ru/journal-board-action?user={LOGIN}&domain={DOMAIN}")
|
|
||||||
pass
|
|
||||||
|
|
||||||
page.wait_for_url(f"https://{DOMAIN}.eljur.ru/journal-board-action?user={LOGIN}&domain={DOMAIN}")
|
|
||||||
page.wait_for_load_state("domcontentloaded")
|
|
||||||
if page.url != f"https://{DOMAIN}.eljur.ru/journal-board-action?user={LOGIN}&domain={DOMAIN}" and page.url != f"https://{DOMAIN}.eljur.ru/journal-board-action":
|
|
||||||
page.goto(f"https://{DOMAIN}.eljur.ru/journal-board-action")
|
|
||||||
page.wait_for_load_state("domcontentloaded")
|
|
||||||
|
|
||||||
page_content = page.content()
|
|
||||||
|
|
||||||
new_content = page.locator('[class="board-item active"]').first
|
page.wait_for_url(f"https://{DOMAIN}.eljur.ru/journal-board-action?user={LOGIN}&domain={DOMAIN}")
|
||||||
try:
|
# page.wait_for_load_state("domcontentloaded")
|
||||||
new_content.screenshot(path = "data/screenshot.png")
|
|
||||||
except FileNotFoundError:
|
page_content = page.content()
|
||||||
mkdir("data")
|
|
||||||
logging.info("can`t take screenshot")
|
new_content = page.locator('[class="board-item active"]').first
|
||||||
logging.info("retry")
|
try:
|
||||||
new_content.screenshot(path = "data/screenshot.png")
|
new_content.screenshot(path = "data/screenshot.png")
|
||||||
|
except FileNotFoundError:
|
||||||
|
mkdir("data")
|
||||||
|
logging.info("can`t take screenshot")
|
||||||
|
logging.info("retry")
|
||||||
|
new_content.screenshot(path = "data/screenshot.png")
|
||||||
|
except:
|
||||||
|
logging.error("can`t take screenshot")
|
||||||
|
|
||||||
|
# Close browser
|
||||||
|
browser.close()
|
||||||
|
logging.debug("browser closed")
|
||||||
|
|
||||||
|
# Process extracted content with BeautifulSoup
|
||||||
|
soup = BeautifulSoup(page_content, features="lxml")
|
||||||
|
|
||||||
|
logging.debug("content extracted")
|
||||||
|
# return '\n'.join(el.strip() for el in str(soup.get_text).split('\n') if el.strip())
|
||||||
|
return str(soup.get_text)
|
||||||
|
except _api_types.TimeoutError:
|
||||||
|
logging.error("connection timed out")
|
||||||
|
return " "
|
||||||
except:
|
except:
|
||||||
logging.error("can`t take screenshot")
|
logging.error(f"Error while request: {Exception}")
|
||||||
|
return " "
|
||||||
# Process extracted content with BeautifulSoup
|
|
||||||
soup = BeautifulSoup(page_content, features="lxml")
|
|
||||||
|
|
||||||
logging.debug("content extracted")
|
|
||||||
# return '\n'.join(el.strip() for el in str(soup.get_text).split('\n') if el.strip())
|
|
||||||
return str(soup.get_text)
|
|
||||||
|
|
||||||
except _api_types.TimeoutError:
|
|
||||||
logging.error("connection timed out")
|
|
||||||
return " "
|
|
||||||
except Exception as error:
|
|
||||||
logging.error(f"Error while request: {str(error)}")
|
|
||||||
return " "
|
|
Loading…
Reference in New Issue
Block a user