Compare commits
No commits in common. "python-for-channel" and "python" have entirely different histories.
python-for
...
python
39
.gitea/workflows/build.yaml
Normal file
39
.gitea/workflows/build.yaml
Normal file
@ -0,0 +1,39 @@
|
||||
name: Actions Build Docker Image
|
||||
run-name: ${{ gitea.actor }} is building new image 🚀
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
Explore-Gitea-Actions:
|
||||
runs-on: soaska
|
||||
steps:
|
||||
- run: echo "🎉 The job was automatically triggered by a ${{ gitea.event_name }} event."
|
||||
- name: Install Node.js
|
||||
run: |
|
||||
apk add --no-cache nodejs
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v3
|
||||
- name: List files in the repository
|
||||
run: |
|
||||
ls ${{ gitea.workspace }}
|
||||
- run: echo "🍏 This job's status is ${{ job.status }}."
|
||||
- name: Install Docker
|
||||
run: |
|
||||
apk add --no-cache docker
|
||||
- name: Start Docker service
|
||||
run: |
|
||||
dockerd &
|
||||
sleep 5
|
||||
docker info
|
||||
- name: Build Dockerfile
|
||||
run: |
|
||||
docker build -t $(basename ${{ github.repository }}) .
|
||||
- name: Upload Docker image to soaska.ru
|
||||
run: |
|
||||
package_name=$(basename ${{ github.repository }})
|
||||
branch_name=$(git rev-parse --abbrev-ref HEAD)
|
||||
|
||||
docker login -u ${{ secrets.username }} -p ${{ secrets.password }} soaska.ru
|
||||
docker tag $package_name soaska.ru/soaska/$package_name:$branch_name
|
||||
docker tag $package_name soaska.ru/soaska/$package_name:latest
|
||||
docker push soaska.ru/soaska/$package_name:$branch_name
|
||||
docker push soaska.ru/soaska/$package_name:latest
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -5,3 +5,6 @@
|
||||
/__pycache__
|
||||
/logfile.log
|
||||
/data
|
||||
|
||||
# Временно
|
||||
/docker
|
@ -1,15 +1,11 @@
|
||||
FROM python:3.11-slim-bullseye
|
||||
|
||||
WORKDIR /app/
|
||||
|
||||
COPY requirements.txt .
|
||||
|
||||
RUN pip install -r requirements.txt --no-cache-dir
|
||||
|
||||
RUN playwright install chromium
|
||||
|
||||
RUN playwright install-deps
|
||||
|
||||
COPY . .
|
||||
|
||||
CMD ["python", "main.py"]
|
||||
ENTRYPOINT ["python", "main.py"]
|
||||
CMD [ "start" ]
|
@ -14,8 +14,8 @@ cd eljur
|
||||
```
|
||||
|
||||
#### Создание .env файла
|
||||
Используйте sample.env, чтобы создать свой .env файл.
|
||||
env.example - пример, как может выглядеть файл .env
|
||||
Используйте [sample.env](https://git.soaska.ru/sosiska/eljur/-/blob/main/sample.env?ref_type=heads), чтобы создать свой .env файл.
|
||||
[env.example](https://git.soaska.ru/sosiska/eljur/-/blob/main/env.example?ref_type=heads) - пример, как может выглядеть файл .env
|
||||
|
||||
```
|
||||
ELJUR_LOGIN=Vasya2005
|
||||
|
16
main.py
16
main.py
@ -48,8 +48,8 @@ def get_html():
|
||||
with open('data/table.html') as html_file:
|
||||
val = html_file.read()
|
||||
logging.debug("data/table.html got")
|
||||
except:
|
||||
logging.error(f"cant read data/table.html: {Exception}")
|
||||
except Exception as error:
|
||||
logging.error(f"cant read data/table.html: {str(error)}")
|
||||
return val
|
||||
|
||||
|
||||
@ -78,6 +78,7 @@ def verify_data():
|
||||
|
||||
|
||||
def run():
|
||||
global new_table, old_table
|
||||
new_table = scrape()
|
||||
old_table = get_html()
|
||||
old_last_block = ""
|
||||
@ -95,14 +96,19 @@ def run():
|
||||
|
||||
update_html(new_table)
|
||||
new_table_soup = BeautifulSoup(new_table, "lxml")
|
||||
new_table = new_table_soup.prettify()
|
||||
new_table_pretty = new_table_soup.prettify()
|
||||
old_table_soup = BeautifulSoup(old_table, "lxml")
|
||||
old_table = old_table_soup.prettify()
|
||||
get_difference(old_table, new_table)
|
||||
old_table_pretty = old_table_soup.prettify()
|
||||
get_difference(old_table_pretty, new_table_pretty)
|
||||
|
||||
|
||||
if old_last_block != new_last_block:
|
||||
send_photo("data/screenshot.png")
|
||||
else:
|
||||
send_photo("sad.png")
|
||||
|
||||
send_document("data/difference.html")
|
||||
send_document("data/table.html")
|
||||
|
||||
return
|
||||
logging.debug("same result. nothing to do")
|
||||
|
92
scraper.py
92
scraper.py
@ -6,24 +6,22 @@ from playwright._impl import _api_types
|
||||
from os import mkdir
|
||||
|
||||
|
||||
# Use sync version of Playwright
|
||||
p = sync_playwright().start()
|
||||
# Launch the browser
|
||||
browser = p.chromium.launch()
|
||||
logging.debug("browser opened")
|
||||
|
||||
# Open a new browser page
|
||||
page = browser.new_page()
|
||||
|
||||
|
||||
def scrape():
|
||||
# Use sync version of Playwright
|
||||
|
||||
with sync_playwright() as p:
|
||||
|
||||
global page
|
||||
try:
|
||||
# Open our test file in the opened page
|
||||
page.goto(f"https://{DOMAIN}.eljur.ru/authorize?return_uri=%2Fjournal-board-action")
|
||||
try:
|
||||
# Launch the browser
|
||||
browser = p.chromium.launch()
|
||||
logging.debug("browser opened")
|
||||
|
||||
# Open a new browser page
|
||||
page = browser.new_page()
|
||||
|
||||
# Create a URI for our test file
|
||||
|
||||
# Open our test file in the opened page
|
||||
page.goto(f"https://{DOMAIN}.eljur.ru/authorize?return_uri=%2Fjournal-board-action")
|
||||
|
||||
# Log In
|
||||
login_field = page.locator('[type="text"]')
|
||||
password_field = page.locator('[type="password"]')
|
||||
@ -33,36 +31,40 @@ def scrape():
|
||||
|
||||
submit_button = page.locator('[type="submit"]')
|
||||
submit_button.click()
|
||||
except Exception as error:
|
||||
logging.debug(f"Error while request: {str(error)}")
|
||||
page.goto(f"https://{DOMAIN}.eljur.ru/journal-board-action?user={LOGIN}&domain={DOMAIN}")
|
||||
pass
|
||||
|
||||
page.wait_for_url(f"https://{DOMAIN}.eljur.ru/journal-board-action?user={LOGIN}&domain={DOMAIN}")
|
||||
# page.wait_for_load_state("domcontentloaded")
|
||||
page.wait_for_url(f"https://{DOMAIN}.eljur.ru/journal-board-action?user={LOGIN}&domain={DOMAIN}")
|
||||
page.wait_for_load_state("domcontentloaded")
|
||||
if page.url != f"https://{DOMAIN}.eljur.ru/journal-board-action?user={LOGIN}&domain={DOMAIN}" and page.url != f"https://{DOMAIN}.eljur.ru/journal-board-action":
|
||||
page.goto(f"https://{DOMAIN}.eljur.ru/journal-board-action")
|
||||
page.wait_for_load_state("domcontentloaded")
|
||||
|
||||
page_content = page.content()
|
||||
page_content = page.content()
|
||||
|
||||
new_content = page.locator('[class="board-item active"]').first
|
||||
try:
|
||||
new_content.screenshot(path = "data/screenshot.png")
|
||||
except FileNotFoundError:
|
||||
mkdir("data")
|
||||
logging.info("can`t take screenshot")
|
||||
logging.info("retry")
|
||||
new_content.screenshot(path = "data/screenshot.png")
|
||||
except:
|
||||
logging.error("can`t take screenshot")
|
||||
|
||||
# Close browser
|
||||
browser.close()
|
||||
logging.debug("browser closed")
|
||||
|
||||
# Process extracted content with BeautifulSoup
|
||||
soup = BeautifulSoup(page_content, features="lxml")
|
||||
|
||||
logging.debug("content extracted")
|
||||
# return '\n'.join(el.strip() for el in str(soup.get_text).split('\n') if el.strip())
|
||||
return str(soup.get_text)
|
||||
except _api_types.TimeoutError:
|
||||
logging.error("connection timed out")
|
||||
return " "
|
||||
new_content = page.locator('[class="board-item active"]').first
|
||||
try:
|
||||
new_content.screenshot(path = "data/screenshot.png")
|
||||
except FileNotFoundError:
|
||||
mkdir("data")
|
||||
logging.info("can`t take screenshot")
|
||||
logging.info("retry")
|
||||
new_content.screenshot(path = "data/screenshot.png")
|
||||
except:
|
||||
logging.error(f"Error while request: {Exception}")
|
||||
return " "
|
||||
logging.error("can`t take screenshot")
|
||||
|
||||
# Process extracted content with BeautifulSoup
|
||||
soup = BeautifulSoup(page_content, features="lxml")
|
||||
|
||||
logging.debug("content extracted")
|
||||
# return '\n'.join(el.strip() for el in str(soup.get_text).split('\n') if el.strip())
|
||||
return str(soup.get_text)
|
||||
|
||||
except _api_types.TimeoutError:
|
||||
logging.error("connection timed out")
|
||||
return " "
|
||||
except Exception as error:
|
||||
logging.error(f"Error while request: {str(error)}")
|
||||
return " "
|
Loading…
Reference in New Issue
Block a user