Compare commits
No commits in common. "python-for-channel" and "python" have entirely different histories.
python-for
...
python
39
.gitea/workflows/build.yaml
Normal file
39
.gitea/workflows/build.yaml
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
name: Actions Build Docker Image
|
||||||
|
run-name: ${{ gitea.actor }} is building new image 🚀
|
||||||
|
on: [push]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
Explore-Gitea-Actions:
|
||||||
|
runs-on: soaska
|
||||||
|
steps:
|
||||||
|
- run: echo "🎉 The job was automatically triggered by a ${{ gitea.event_name }} event."
|
||||||
|
- name: Install Node.js
|
||||||
|
run: |
|
||||||
|
apk add --no-cache nodejs
|
||||||
|
- name: Check out repository code
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
- name: List files in the repository
|
||||||
|
run: |
|
||||||
|
ls ${{ gitea.workspace }}
|
||||||
|
- run: echo "🍏 This job's status is ${{ job.status }}."
|
||||||
|
- name: Install Docker
|
||||||
|
run: |
|
||||||
|
apk add --no-cache docker
|
||||||
|
- name: Start Docker service
|
||||||
|
run: |
|
||||||
|
dockerd &
|
||||||
|
sleep 5
|
||||||
|
docker info
|
||||||
|
- name: Build Dockerfile
|
||||||
|
run: |
|
||||||
|
docker build -t $(basename ${{ github.repository }}) .
|
||||||
|
- name: Upload Docker image to soaska.ru
|
||||||
|
run: |
|
||||||
|
package_name=$(basename ${{ github.repository }})
|
||||||
|
branch_name=$(git rev-parse --abbrev-ref HEAD)
|
||||||
|
|
||||||
|
docker login -u ${{ secrets.username }} -p ${{ secrets.password }} soaska.ru
|
||||||
|
docker tag $package_name soaska.ru/soaska/$package_name:$branch_name
|
||||||
|
docker tag $package_name soaska.ru/soaska/$package_name:latest
|
||||||
|
docker push soaska.ru/soaska/$package_name:$branch_name
|
||||||
|
docker push soaska.ru/soaska/$package_name:latest
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -5,3 +5,6 @@
|
|||||||
/__pycache__
|
/__pycache__
|
||||||
/logfile.log
|
/logfile.log
|
||||||
/data
|
/data
|
||||||
|
|
||||||
|
# Временно
|
||||||
|
/docker
|
@ -1,15 +1,11 @@
|
|||||||
FROM python:3.11-slim-bullseye
|
FROM python:3.11-slim-bullseye
|
||||||
|
|
||||||
WORKDIR /app/
|
WORKDIR /app/
|
||||||
|
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
|
|
||||||
RUN pip install -r requirements.txt --no-cache-dir
|
RUN pip install -r requirements.txt --no-cache-dir
|
||||||
|
|
||||||
RUN playwright install chromium
|
RUN playwright install chromium
|
||||||
|
|
||||||
RUN playwright install-deps
|
RUN playwright install-deps
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
ENTRYPOINT ["python", "main.py"]
|
||||||
CMD ["python", "main.py"]
|
CMD [ "start" ]
|
@ -14,8 +14,8 @@ cd eljur
|
|||||||
```
|
```
|
||||||
|
|
||||||
#### Создание .env файла
|
#### Создание .env файла
|
||||||
Используйте sample.env, чтобы создать свой .env файл.
|
Используйте [sample.env](https://git.soaska.ru/sosiska/eljur/-/blob/main/sample.env?ref_type=heads), чтобы создать свой .env файл.
|
||||||
env.example - пример, как может выглядеть файл .env
|
[env.example](https://git.soaska.ru/sosiska/eljur/-/blob/main/env.example?ref_type=heads) - пример, как может выглядеть файл .env
|
||||||
|
|
||||||
```
|
```
|
||||||
ELJUR_LOGIN=Vasya2005
|
ELJUR_LOGIN=Vasya2005
|
||||||
|
16
main.py
16
main.py
@ -48,8 +48,8 @@ def get_html():
|
|||||||
with open('data/table.html') as html_file:
|
with open('data/table.html') as html_file:
|
||||||
val = html_file.read()
|
val = html_file.read()
|
||||||
logging.debug("data/table.html got")
|
logging.debug("data/table.html got")
|
||||||
except:
|
except Exception as error:
|
||||||
logging.error(f"cant read data/table.html: {Exception}")
|
logging.error(f"cant read data/table.html: {str(error)}")
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
|
||||||
@ -78,6 +78,7 @@ def verify_data():
|
|||||||
|
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
|
global new_table, old_table
|
||||||
new_table = scrape()
|
new_table = scrape()
|
||||||
old_table = get_html()
|
old_table = get_html()
|
||||||
old_last_block = ""
|
old_last_block = ""
|
||||||
@ -95,14 +96,19 @@ def run():
|
|||||||
|
|
||||||
update_html(new_table)
|
update_html(new_table)
|
||||||
new_table_soup = BeautifulSoup(new_table, "lxml")
|
new_table_soup = BeautifulSoup(new_table, "lxml")
|
||||||
new_table = new_table_soup.prettify()
|
new_table_pretty = new_table_soup.prettify()
|
||||||
old_table_soup = BeautifulSoup(old_table, "lxml")
|
old_table_soup = BeautifulSoup(old_table, "lxml")
|
||||||
old_table = old_table_soup.prettify()
|
old_table_pretty = old_table_soup.prettify()
|
||||||
get_difference(old_table, new_table)
|
get_difference(old_table_pretty, new_table_pretty)
|
||||||
|
|
||||||
|
|
||||||
if old_last_block != new_last_block:
|
if old_last_block != new_last_block:
|
||||||
send_photo("data/screenshot.png")
|
send_photo("data/screenshot.png")
|
||||||
|
else:
|
||||||
|
send_photo("sad.png")
|
||||||
|
|
||||||
|
send_document("data/difference.html")
|
||||||
|
send_document("data/table.html")
|
||||||
|
|
||||||
return
|
return
|
||||||
logging.debug("same result. nothing to do")
|
logging.debug("same result. nothing to do")
|
||||||
|
92
scraper.py
92
scraper.py
@ -6,24 +6,22 @@ from playwright._impl import _api_types
|
|||||||
from os import mkdir
|
from os import mkdir
|
||||||
|
|
||||||
|
|
||||||
|
# Use sync version of Playwright
|
||||||
|
p = sync_playwright().start()
|
||||||
|
# Launch the browser
|
||||||
|
browser = p.chromium.launch()
|
||||||
|
logging.debug("browser opened")
|
||||||
|
|
||||||
|
# Open a new browser page
|
||||||
|
page = browser.new_page()
|
||||||
|
|
||||||
|
|
||||||
def scrape():
|
def scrape():
|
||||||
# Use sync version of Playwright
|
global page
|
||||||
|
try:
|
||||||
with sync_playwright() as p:
|
# Open our test file in the opened page
|
||||||
|
page.goto(f"https://{DOMAIN}.eljur.ru/authorize?return_uri=%2Fjournal-board-action")
|
||||||
try:
|
try:
|
||||||
# Launch the browser
|
|
||||||
browser = p.chromium.launch()
|
|
||||||
logging.debug("browser opened")
|
|
||||||
|
|
||||||
# Open a new browser page
|
|
||||||
page = browser.new_page()
|
|
||||||
|
|
||||||
# Create a URI for our test file
|
|
||||||
|
|
||||||
# Open our test file in the opened page
|
|
||||||
page.goto(f"https://{DOMAIN}.eljur.ru/authorize?return_uri=%2Fjournal-board-action")
|
|
||||||
|
|
||||||
# Log In
|
# Log In
|
||||||
login_field = page.locator('[type="text"]')
|
login_field = page.locator('[type="text"]')
|
||||||
password_field = page.locator('[type="password"]')
|
password_field = page.locator('[type="password"]')
|
||||||
@ -33,36 +31,40 @@ def scrape():
|
|||||||
|
|
||||||
submit_button = page.locator('[type="submit"]')
|
submit_button = page.locator('[type="submit"]')
|
||||||
submit_button.click()
|
submit_button.click()
|
||||||
|
except Exception as error:
|
||||||
|
logging.debug(f"Error while request: {str(error)}")
|
||||||
|
page.goto(f"https://{DOMAIN}.eljur.ru/journal-board-action?user={LOGIN}&domain={DOMAIN}")
|
||||||
|
pass
|
||||||
|
|
||||||
page.wait_for_url(f"https://{DOMAIN}.eljur.ru/journal-board-action?user={LOGIN}&domain={DOMAIN}")
|
page.wait_for_url(f"https://{DOMAIN}.eljur.ru/journal-board-action?user={LOGIN}&domain={DOMAIN}")
|
||||||
# page.wait_for_load_state("domcontentloaded")
|
page.wait_for_load_state("domcontentloaded")
|
||||||
|
if page.url != f"https://{DOMAIN}.eljur.ru/journal-board-action?user={LOGIN}&domain={DOMAIN}" and page.url != f"https://{DOMAIN}.eljur.ru/journal-board-action":
|
||||||
|
page.goto(f"https://{DOMAIN}.eljur.ru/journal-board-action")
|
||||||
|
page.wait_for_load_state("domcontentloaded")
|
||||||
|
|
||||||
page_content = page.content()
|
page_content = page.content()
|
||||||
|
|
||||||
new_content = page.locator('[class="board-item active"]').first
|
new_content = page.locator('[class="board-item active"]').first
|
||||||
try:
|
try:
|
||||||
new_content.screenshot(path = "data/screenshot.png")
|
new_content.screenshot(path = "data/screenshot.png")
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
mkdir("data")
|
mkdir("data")
|
||||||
logging.info("can`t take screenshot")
|
logging.info("can`t take screenshot")
|
||||||
logging.info("retry")
|
logging.info("retry")
|
||||||
new_content.screenshot(path = "data/screenshot.png")
|
new_content.screenshot(path = "data/screenshot.png")
|
||||||
except:
|
|
||||||
logging.error("can`t take screenshot")
|
|
||||||
|
|
||||||
# Close browser
|
|
||||||
browser.close()
|
|
||||||
logging.debug("browser closed")
|
|
||||||
|
|
||||||
# Process extracted content with BeautifulSoup
|
|
||||||
soup = BeautifulSoup(page_content, features="lxml")
|
|
||||||
|
|
||||||
logging.debug("content extracted")
|
|
||||||
# return '\n'.join(el.strip() for el in str(soup.get_text).split('\n') if el.strip())
|
|
||||||
return str(soup.get_text)
|
|
||||||
except _api_types.TimeoutError:
|
|
||||||
logging.error("connection timed out")
|
|
||||||
return " "
|
|
||||||
except:
|
except:
|
||||||
logging.error(f"Error while request: {Exception}")
|
logging.error("can`t take screenshot")
|
||||||
return " "
|
|
||||||
|
# Process extracted content with BeautifulSoup
|
||||||
|
soup = BeautifulSoup(page_content, features="lxml")
|
||||||
|
|
||||||
|
logging.debug("content extracted")
|
||||||
|
# return '\n'.join(el.strip() for el in str(soup.get_text).split('\n') if el.strip())
|
||||||
|
return str(soup.get_text)
|
||||||
|
|
||||||
|
except _api_types.TimeoutError:
|
||||||
|
logging.error("connection timed out")
|
||||||
|
return " "
|
||||||
|
except Exception as error:
|
||||||
|
logging.error(f"Error while request: {str(error)}")
|
||||||
|
return " "
|
Loading…
Reference in New Issue
Block a user